From 26c5b828d180976cb50d2ec33d1a4f52c8bc256c Mon Sep 17 00:00:00 2001 From: Brent Gardner Date: Fri, 10 Oct 2025 08:53:47 -0600 Subject: [PATCH 01/30] Add test --- datafusion/sqllogictest/test_files/window.slt | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt index f1a708d84dd3..a3843e2f0585 100644 --- a/datafusion/sqllogictest/test_files/window.slt +++ b/datafusion/sqllogictest/test_files/window.slt @@ -1409,9 +1409,27 @@ SELECT 4144173353 28472563256 20935849039 4076864659 28118515915 24997484146 +query I +SELECT LEAD(c9) OVER (ORDER BY c9) FROM aggregate_test_100 LIMIT 5 +---- +63044568 +141047417 +141680161 +145294611 +225513085 + statement ok set datafusion.optimizer.enable_window_limits = true; +query I +SELECT LEAD(c9) OVER (ORDER BY c9) FROM aggregate_test_100 LIMIT 5 +---- +63044568 +141047417 +141680161 +145294611 +225513085 + query III SELECT c9, From af8572a7e359f82928a5da98de8a579bb3272731 Mon Sep 17 00:00:00 2001 From: Brent Gardner Date: Fri, 10 Oct 2025 16:04:20 -0600 Subject: [PATCH 02/30] Use ROWS instead of RANGE --- .../src/limit_pushdown_past_window.rs | 2 +- datafusion/sqllogictest/test_files/window.slt | 22 +++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs index 42285d1bca53..2b21650597cc 100644 --- a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs +++ b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs @@ -87,7 +87,7 @@ impl PhysicalOptimizerRule for LimitPushPastWindows { let Some(end_bound) = bound_to_usize(&frame.end_bound) else { return reset(node, &mut latest_max); }; - latest_max = cmp::max(end_bound, latest_max); + latest_max = cmp::max(end_bound + 1, latest_max); } return Ok(Transformed::no(node)); } diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt index a3843e2f0585..7d4e9771e0d7 100644 --- a/datafusion/sqllogictest/test_files/window.slt +++ b/datafusion/sqllogictest/test_files/window.slt @@ -1345,7 +1345,7 @@ physical_plan 02)--GlobalLimitExec: skip=0, fetch=5 03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING], mode=[Sorted] 04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted] -05)--------SortExec: TopK(fetch=10), expr=[c9@0 DESC], preserve_partitioning=[false] +05)--------SortExec: TopK(fetch=11), expr=[c9@0 DESC], preserve_partitioning=[false] 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true query III @@ -1388,7 +1388,7 @@ physical_plan 02)--GlobalLimitExec: skip=0, fetch=5 03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING], mode=[Sorted] 04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted] -05)--------SortExec: TopK(fetch=10), expr=[c9@0 DESC], preserve_partitioning=[false] +05)--------SortExec: TopK(fetch=11), expr=[c9@0 DESC], preserve_partitioning=[false] 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true # ensure limit pushdown can handle bigger preceding instead of following @@ -1410,7 +1410,7 @@ SELECT 4076864659 28118515915 24997484146 query I -SELECT LEAD(c9) OVER (ORDER BY c9) FROM aggregate_test_100 LIMIT 5 +SELECT LEAD(c9) OVER (ORDER BY c9 ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM aggregate_test_100 LIMIT 5 ---- 63044568 141047417 @@ -1422,7 +1422,7 @@ statement ok set datafusion.optimizer.enable_window_limits = true; query I -SELECT LEAD(c9) OVER (ORDER BY c9) FROM aggregate_test_100 LIMIT 5 +SELECT LEAD(c9) OVER (ORDER BY c9 ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM aggregate_test_100 LIMIT 5 ---- 63044568 141047417 @@ -1466,7 +1466,7 @@ physical_plan 02)--GlobalLimitExec: skip=5, fetch=5 03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING], mode=[Sorted] 04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted] -05)--------SortExec: TopK(fetch=15), expr=[c9@0 DESC], preserve_partitioning=[false] +05)--------SortExec: TopK(fetch=16), expr=[c9@0 DESC], preserve_partitioning=[false] 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true query III @@ -1550,7 +1550,7 @@ physical_plan 01)ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@2 as rn1, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@1 as rn2] 02)--GlobalLimitExec: skip=0, fetch=5 03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted] -04)------SortExec: TopK(fetch=10), expr=[c9@0 ASC NULLS LAST], preserve_partitioning=[false] +04)------SortExec: TopK(fetch=11), expr=[c9@0 ASC NULLS LAST], preserve_partitioning=[false] 05)--------BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted] 06)----------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false] 07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true @@ -1592,7 +1592,7 @@ physical_plan 01)ProjectionExec: expr=[c9@2 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@5 as sum1, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@3 as sum2, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as rn2] 02)--GlobalLimitExec: skip=0, fetch=5 03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted] -04)------SortExec: TopK(fetch=10), expr=[c9@2 ASC NULLS LAST, c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[false] +04)------SortExec: TopK(fetch=11), expr=[c9@2 ASC NULLS LAST, c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[false] 05)--------BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted] 06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted] 07)------------SortExec: expr=[c9@2 DESC, c1@0 DESC], preserve_partitioning=[false] @@ -1761,7 +1761,7 @@ physical_plan 02)--GlobalLimitExec: skip=0, fetch=5 03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted] 04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted] -05)--------SortExec: TopK(fetch=10), expr=[c1@0 ASC NULLS LAST, c9@1 DESC], preserve_partitioning=[false] +05)--------SortExec: TopK(fetch=11), expr=[c1@0 ASC NULLS LAST, c9@1 DESC], preserve_partitioning=[false] 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c9], file_type=csv, has_header=true @@ -1805,7 +1805,7 @@ physical_plan 02)--GlobalLimitExec: skip=0, fetch=5 03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING], mode=[Sorted] 04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted] -05)--------SortExec: TopK(fetch=10), expr=[c1@0 ASC NULLS LAST, c9@1 DESC], preserve_partitioning=[false] +05)--------SortExec: TopK(fetch=11), expr=[c1@0 ASC NULLS LAST, c9@1 DESC], preserve_partitioning=[false] 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c9], file_type=csv, has_header=true query III @@ -5984,8 +5984,8 @@ physical_plan 01)ProjectionExec: expr=[c1@2 as c1, c2@3 as c2, sum(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum1, sum(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as sum2, count(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as count1, array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@7 as array_agg1, array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@8 as array_agg2] 02)--GlobalLimitExec: skip=0, fetch=5 03)----BoundedWindowAggExec: wdw=[sum(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, count(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "count(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] -04)------SortPreservingMergeExec: [c1@2 ASC NULLS LAST, c2@3 ASC NULLS LAST], fetch=5 -05)--------SortExec: TopK(fetch=5), expr=[c1@2 ASC NULLS LAST, c2@3 ASC NULLS LAST], preserve_partitioning=[true] +04)------SortPreservingMergeExec: [c1@2 ASC NULLS LAST, c2@3 ASC NULLS LAST], fetch=6 +05)--------SortExec: TopK(fetch=6), expr=[c1@2 ASC NULLS LAST, c2@3 ASC NULLS LAST], preserve_partitioning=[true] 06)----------ProjectionExec: expr=[__common_expr_3@0 as __common_expr_1, __common_expr_3@0 AND c2@2 < 4 AND c1@1 > 0 as __common_expr_2, c1@1 as c1, c2@2 as c2] 07)------------ProjectionExec: expr=[c2@1 >= 2 as __common_expr_3, c1@0 as c1, c2@1 as c2] 08)--------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/core/tests/data/partitioned_csv/partition-0.csv], [WORKSPACE_ROOT/datafusion/core/tests/data/partitioned_csv/partition-1.csv], [WORKSPACE_ROOT/datafusion/core/tests/data/partitioned_csv/partition-2.csv], [WORKSPACE_ROOT/datafusion/core/tests/data/partitioned_csv/partition-3.csv]]}, projection=[c1, c2], file_type=csv, has_header=false From 4979f87915bd322663982c48d6061861d3e9b2f3 Mon Sep 17 00:00:00 2001 From: Brent Gardner Date: Sat, 11 Oct 2025 11:21:16 -0600 Subject: [PATCH 03/30] Fix a test --- datafusion/sqllogictest/test_files/window.slt | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt index 7d4e9771e0d7..069bb8c85559 100644 --- a/datafusion/sqllogictest/test_files/window.slt +++ b/datafusion/sqllogictest/test_files/window.slt @@ -5481,7 +5481,7 @@ order by c1, c2, rank; query TT explain select c1, c2, rank1, rank2 from ( - select c1, c2, rank() over (partition by c1 order by c2) as rank1, + select c1, c2, rank() over (partition by c1 order by c2) as rank1, rank() over (partition by c2, c1 order by c1) as rank2 from t1 ) @@ -5515,7 +5515,7 @@ physical_plan query IIII select c1, c2, rank1, rank2 from ( - select c1, c2, rank() over (partition by c1 order by c2) as rank1, + select c1, c2, rank() over (partition by c1 order by c2) as rank1, rank() over (partition by c2, c1 order by c1) as rank2 from t1 ) @@ -5532,7 +5532,7 @@ order by c1, c2, rank1, rank2; query TT explain select c1, c2, rank1, rank2 from ( - select c1, c2, rank() over (partition by c1 order by c2) as rank1, + select c1, c2, rank() over (partition by c1 order by c2) as rank1, rank() over (partition by c2, c1 order by c1) as rank2 from t1 ) @@ -5566,7 +5566,7 @@ physical_plan query IIII select c1, c2, rank1, rank2 from ( - select c1, c2, rank() over (partition by c1 order by c2) as rank1, + select c1, c2, rank() over (partition by c1 order by c2) as rank1, rank() over (partition by c2, c1 order by c1) as rank2 from t1 ) @@ -5984,8 +5984,8 @@ physical_plan 01)ProjectionExec: expr=[c1@2 as c1, c2@3 as c2, sum(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum1, sum(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as sum2, count(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as count1, array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@7 as array_agg1, array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@8 as array_agg2] 02)--GlobalLimitExec: skip=0, fetch=5 03)----BoundedWindowAggExec: wdw=[sum(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, count(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "count(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] -04)------SortPreservingMergeExec: [c1@2 ASC NULLS LAST, c2@3 ASC NULLS LAST], fetch=6 -05)--------SortExec: TopK(fetch=6), expr=[c1@2 ASC NULLS LAST, c2@3 ASC NULLS LAST], preserve_partitioning=[true] +04)------SortPreservingMergeExec: [c1@2 ASC NULLS LAST, c2@3 ASC NULLS LAST], fetch=5 +05)--------SortExec: TopK(fetch=5), expr=[c1@2 ASC NULLS LAST, c2@3 ASC NULLS LAST], preserve_partitioning=[true] 06)----------ProjectionExec: expr=[__common_expr_3@0 as __common_expr_1, __common_expr_3@0 AND c2@2 < 4 AND c1@1 > 0 as __common_expr_2, c1@1 as c1, c2@2 as c2] 07)------------ProjectionExec: expr=[c2@1 >= 2 as __common_expr_3, c1@0 as c1, c2@1 as c2] 08)--------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/core/tests/data/partitioned_csv/partition-0.csv], [WORKSPACE_ROOT/datafusion/core/tests/data/partitioned_csv/partition-1.csv], [WORKSPACE_ROOT/datafusion/core/tests/data/partitioned_csv/partition-2.csv], [WORKSPACE_ROOT/datafusion/core/tests/data/partitioned_csv/partition-3.csv]]}, projection=[c1, c2], file_type=csv, has_header=false From 67b7bbcdc202f7d18864b00c2e001337e6bf871e Mon Sep 17 00:00:00 2001 From: Brent Gardner Date: Sat, 11 Oct 2025 11:26:42 -0600 Subject: [PATCH 04/30] progress --- Cargo.lock | 1 + datafusion/functions-window/src/lead_lag.rs | 6 +- datafusion/physical-optimizer/Cargo.toml | 1 + .../src/limit_pushdown_past_window.rs | 73 ++++++++++++++++++- 4 files changed, 77 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 05c70d83b8d6..b1275bb7f947 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2452,6 +2452,7 @@ dependencies = [ "datafusion-execution", "datafusion-expr", "datafusion-expr-common", + "datafusion-functions-window", "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", diff --git a/datafusion/functions-window/src/lead_lag.rs b/datafusion/functions-window/src/lead_lag.rs index c4cbc268cdec..e2bba3d7cd3c 100644 --- a/datafusion/functions-window/src/lead_lag.rs +++ b/datafusion/functions-window/src/lead_lag.rs @@ -95,7 +95,7 @@ pub fn lead( } #[derive(Debug, PartialEq, Eq, Hash)] -enum WindowShiftKind { +pub enum WindowShiftKind { Lag, Lead, } @@ -148,6 +148,10 @@ impl WindowShift { pub fn lead() -> Self { Self::new(WindowShiftKind::Lead) } + + pub fn kind(&self) -> &WindowShiftKind { + &self.kind + } } static LAG_DOCUMENTATION: LazyLock = LazyLock::new(|| { diff --git a/datafusion/physical-optimizer/Cargo.toml b/datafusion/physical-optimizer/Cargo.toml index 15466cd86bb0..0f765ad9b9f3 100644 --- a/datafusion/physical-optimizer/Cargo.toml +++ b/datafusion/physical-optimizer/Cargo.toml @@ -43,6 +43,7 @@ datafusion-common = { workspace = true } datafusion-execution = { workspace = true } datafusion-expr = { workspace = true } datafusion-expr-common = { workspace = true, default-features = true } +datafusion-functions-window = { workspace = true } datafusion-physical-expr = { workspace = true } datafusion-physical-expr-common = { workspace = true } datafusion-physical-plan = { workspace = true } diff --git a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs index 2b21650597cc..7f526a0c43cc 100644 --- a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs +++ b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs @@ -19,15 +19,21 @@ use crate::PhysicalOptimizerRule; use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{Transformed, TreeNode}; use datafusion_common::ScalarValue; -use datafusion_expr::{WindowFrameBound, WindowFrameUnits}; +use datafusion_expr::{WindowFrameBound, WindowFrameUnits, WindowUDFImpl}; use datafusion_physical_plan::execution_plan::CardinalityEffect; use datafusion_physical_plan::limit::GlobalLimitExec; use datafusion_physical_plan::sorts::sort::SortExec; use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec; -use datafusion_physical_plan::windows::BoundedWindowAggExec; +use datafusion_physical_plan::windows::{BoundedWindowAggExec, WindowUDFExpr}; use datafusion_physical_plan::ExecutionPlan; use std::cmp; use std::sync::Arc; +use itertools::Itertools; +use datafusion_functions_window::lead_lag::WindowShift; +use datafusion_functions_window::nth_value::NthValue; +use datafusion_functions_window::row_number::RowNumber; +use datafusion_physical_expr::expressions::{Column, Literal}; +use datafusion_physical_expr::window::{PlainAggregateWindowExpr, SlidingAggregateWindowExpr, StandardWindowExpr, StandardWindowFunctionExpr, WindowExpr}; /// This rule inspects [`ExecutionPlan`]'s attempting to find fetch limits that were not pushed /// down by `LimitPushdown` because [BoundedWindowAggExec]s were "in the way". If the window is @@ -80,6 +86,9 @@ impl PhysicalOptimizerRule for LimitPushPastWindows { // grow the limit if we hit a window function if let Some(window) = node.as_any().downcast_ref::() { for expr in window.window_expr().iter() { + let Some(growth) = grow_window_amount(expr) else { + return reset(node, &mut latest_max); + }; let frame = expr.get_window_frame(); if frame.units != WindowFrameUnits::Rows { return reset(node, &mut latest_max); // expression-based limits? @@ -87,7 +96,7 @@ impl PhysicalOptimizerRule for LimitPushPastWindows { let Some(end_bound) = bound_to_usize(&frame.end_bound) else { return reset(node, &mut latest_max); }; - latest_max = cmp::max(end_bound + 1, latest_max); + latest_max = cmp::max(end_bound + growth, latest_max); } return Ok(Transformed::no(node)); } @@ -144,6 +153,64 @@ impl PhysicalOptimizerRule for LimitPushPastWindows { } } +/// Examines the `WindowExpr` and decides: +/// 1. The expression does not change the window size +/// 2. The expression grows it by X amount +/// 3. We don't know +/// +/// # Arguments +/// +/// * `expr` the expression to examine +/// +/// # Returns +/// +/// `None` if we don't know the effect on window size, else Some(amount) +fn grow_window_amount(expr: &Arc) -> Option { + println!("grow_window_amount expr={:?}", expr); + if expr.as_any().is::() + || expr.as_any().is::() { + return Some(0); // aggregates do not change window + } + let Some(swe) = expr.as_any().downcast_ref::() else { + return None; // `StandardWindowExpr` is the only remaining one in DataFusion code + }; + let swfe = swe.get_standard_func_expr(); + let Some(udf) = swfe.as_any().downcast_ref::() else { + return None; // Always `WindowUDFExpr` unless someone added extensions + }; + let fun = udf.fun().inner(); + + // no change + if fun.as_any().is::() { + return Some(0); + }; + + // get arguments + let args = udf.expressions(); + let Ok(arg) = args.iter().exactly_one() else { + return None; + }; + let Some(amount) = arg.as_any().downcast_ref::() else { + return None; + }; + + // window manipulating + if let Some(fun) = fun.as_any().downcast_ref::() { + println!("WindowShift args={:?}", amount); + return Some(0); // TODO: get amount and return it + }; + if let Some(fun) = fun.as_any().downcast_ref::() { + println!("WindowShift args={:?}", amount); + return Some(0); // TODO: get amount and return it + }; + + // explicitly can't handle: cume_dist, ntile, percent_rank + + // maybe in the future: dense_rank, rank, first_value + + None // Don't know, can't optimize +} + fn bound_to_usize(bound: &WindowFrameBound) -> Option { match bound { WindowFrameBound::Preceding(_) => Some(0), From 6891e3d6b0100ccc73ec663b354f60a6369b52c1 Mon Sep 17 00:00:00 2001 From: Brent Gardner Date: Sat, 11 Oct 2025 11:33:01 -0600 Subject: [PATCH 05/30] window.slt like master --- datafusion/sqllogictest/test_files/window.slt | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt index 069bb8c85559..cb2faf1cc785 100644 --- a/datafusion/sqllogictest/test_files/window.slt +++ b/datafusion/sqllogictest/test_files/window.slt @@ -1345,7 +1345,7 @@ physical_plan 02)--GlobalLimitExec: skip=0, fetch=5 03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING], mode=[Sorted] 04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted] -05)--------SortExec: TopK(fetch=11), expr=[c9@0 DESC], preserve_partitioning=[false] +05)--------SortExec: TopK(fetch=10), expr=[c9@0 DESC], preserve_partitioning=[false] 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true query III @@ -1388,7 +1388,7 @@ physical_plan 02)--GlobalLimitExec: skip=0, fetch=5 03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING], mode=[Sorted] 04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted] -05)--------SortExec: TopK(fetch=11), expr=[c9@0 DESC], preserve_partitioning=[false] +05)--------SortExec: TopK(fetch=10), expr=[c9@0 DESC], preserve_partitioning=[false] 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true # ensure limit pushdown can handle bigger preceding instead of following @@ -1466,7 +1466,7 @@ physical_plan 02)--GlobalLimitExec: skip=5, fetch=5 03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING], mode=[Sorted] 04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted] -05)--------SortExec: TopK(fetch=16), expr=[c9@0 DESC], preserve_partitioning=[false] +05)--------SortExec: TopK(fetch=15), expr=[c9@0 DESC], preserve_partitioning=[false] 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true query III @@ -1550,7 +1550,7 @@ physical_plan 01)ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@2 as rn1, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@1 as rn2] 02)--GlobalLimitExec: skip=0, fetch=5 03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted] -04)------SortExec: TopK(fetch=11), expr=[c9@0 ASC NULLS LAST], preserve_partitioning=[false] +04)------SortExec: TopK(fetch=10), expr=[c9@0 ASC NULLS LAST], preserve_partitioning=[false] 05)--------BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted] 06)----------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false] 07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true @@ -1592,7 +1592,7 @@ physical_plan 01)ProjectionExec: expr=[c9@2 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@5 as sum1, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@3 as sum2, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as rn2] 02)--GlobalLimitExec: skip=0, fetch=5 03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted] -04)------SortExec: TopK(fetch=11), expr=[c9@2 ASC NULLS LAST, c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[false] +04)------SortExec: TopK(fetch=10), expr=[c9@2 ASC NULLS LAST, c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[false] 05)--------BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted] 06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted] 07)------------SortExec: expr=[c9@2 DESC, c1@0 DESC], preserve_partitioning=[false] @@ -1761,7 +1761,7 @@ physical_plan 02)--GlobalLimitExec: skip=0, fetch=5 03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted] 04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted] -05)--------SortExec: TopK(fetch=11), expr=[c1@0 ASC NULLS LAST, c9@1 DESC], preserve_partitioning=[false] +05)--------SortExec: TopK(fetch=10), expr=[c1@0 ASC NULLS LAST, c9@1 DESC], preserve_partitioning=[false] 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c9], file_type=csv, has_header=true @@ -1805,7 +1805,7 @@ physical_plan 02)--GlobalLimitExec: skip=0, fetch=5 03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING], mode=[Sorted] 04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted] -05)--------SortExec: TopK(fetch=11), expr=[c1@0 ASC NULLS LAST, c9@1 DESC], preserve_partitioning=[false] +05)--------SortExec: TopK(fetch=10), expr=[c1@0 ASC NULLS LAST, c9@1 DESC], preserve_partitioning=[false] 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c9], file_type=csv, has_header=true query III From 81d53534a3591373e1c26d65130faa83a6053761 Mon Sep 17 00:00:00 2001 From: Brent Gardner Date: Sat, 11 Oct 2025 13:06:43 -0600 Subject: [PATCH 06/30] passing existing tests --- datafusion/expr/src/expr_fn.rs | 9 ++ datafusion/expr/src/udwf.rs | 15 ++++ datafusion/functions-window/src/cume_dist.rs | 4 + datafusion/functions-window/src/lead_lag.rs | 4 + datafusion/functions-window/src/nth_value.rs | 8 ++ datafusion/functions-window/src/ntile.rs | 4 + datafusion/functions-window/src/rank.rs | 8 ++ datafusion/functions-window/src/row_number.rs | 4 + .../src/limit_pushdown_past_window.rs | 89 +++++++++++-------- 9 files changed, 108 insertions(+), 37 deletions(-) diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 4d8b94ba27ff..42b4cd3a7a8d 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -625,6 +625,7 @@ pub fn create_udwf( return_type: Arc, volatility: Volatility, partition_evaluator_factory: PartitionEvaluatorFactory, + is_causal: bool, ) -> WindowUDF { let return_type = Arc::unwrap_or_clone(return_type); WindowUDF::from(SimpleWindowUDF::new( @@ -633,6 +634,7 @@ pub fn create_udwf( return_type, volatility, partition_evaluator_factory, + is_causal, )) } @@ -644,6 +646,7 @@ pub struct SimpleWindowUDF { signature: Signature, return_type: DataType, partition_evaluator_factory: PtrEq, + is_causal: bool, } impl Debug for SimpleWindowUDF { @@ -666,6 +669,7 @@ impl SimpleWindowUDF { return_type: DataType, volatility: Volatility, partition_evaluator_factory: PartitionEvaluatorFactory, + is_causal: bool, ) -> Self { let name = name.into(); let signature = Signature::exact([input_type].to_vec(), volatility); @@ -674,6 +678,7 @@ impl SimpleWindowUDF { signature, return_type, partition_evaluator_factory: partition_evaluator_factory.into(), + is_causal, } } } @@ -705,6 +710,10 @@ impl WindowUDFImpl for SimpleWindowUDF { true, ))) } + + fn is_causal(&self) -> bool { + self.is_causal + } } pub fn interval_year_month_lit(value: &str) -> Expr { diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs index 402add0391e1..cce8f575741c 100644 --- a/datafusion/expr/src/udwf.rs +++ b/datafusion/expr/src/udwf.rs @@ -414,6 +414,9 @@ pub trait WindowUDFImpl: Debug + DynEq + DynHash + Send + Sync { fn documentation(&self) -> Option<&Documentation> { None } + + /// Returns true if this function only needs access to current and previous rows + fn is_causal(&self) -> bool; } pub enum ReversedUDWF { @@ -523,6 +526,10 @@ impl WindowUDFImpl for AliasedWindowUDFImpl { fn documentation(&self) -> Option<&Documentation> { self.inner.documentation() } + + fn is_causal(&self) -> bool { + self.inner.is_causal() + } } #[cfg(test)] @@ -574,6 +581,10 @@ mod test { fn field(&self, _field_args: WindowUDFFieldArgs) -> Result { unimplemented!() } + + fn is_causal(&self) -> bool { + false + } } #[derive(Debug, Clone, PartialEq, Eq, Hash)] @@ -613,6 +624,10 @@ mod test { fn field(&self, _field_args: WindowUDFFieldArgs) -> Result { unimplemented!() } + + fn is_causal(&self) -> bool { + false + } } #[test] diff --git a/datafusion/functions-window/src/cume_dist.rs b/datafusion/functions-window/src/cume_dist.rs index 9b4c682f2e36..42d00570e6cd 100644 --- a/datafusion/functions-window/src/cume_dist.rs +++ b/datafusion/functions-window/src/cume_dist.rs @@ -110,6 +110,10 @@ impl WindowUDFImpl for CumeDist { fn documentation(&self) -> Option<&Documentation> { self.doc() } + + fn is_causal(&self) -> bool { + false + } } #[derive(Debug, Default)] diff --git a/datafusion/functions-window/src/lead_lag.rs b/datafusion/functions-window/src/lead_lag.rs index e2bba3d7cd3c..047f6120ad26 100644 --- a/datafusion/functions-window/src/lead_lag.rs +++ b/datafusion/functions-window/src/lead_lag.rs @@ -303,6 +303,10 @@ impl WindowUDFImpl for WindowShift { WindowShiftKind::Lead => Some(get_lead_doc()), } } + + fn is_causal(&self) -> bool { + false // TODO: we can grow limit by N + } } /// When `lead`/`lag` is evaluated on a `NULL` expression we attempt to diff --git a/datafusion/functions-window/src/nth_value.rs b/datafusion/functions-window/src/nth_value.rs index 02beeec4ec27..61184be8be0b 100644 --- a/datafusion/functions-window/src/nth_value.rs +++ b/datafusion/functions-window/src/nth_value.rs @@ -126,6 +126,10 @@ impl NthValue { pub fn nth() -> Self { Self::new(NthValueKind::Nth) } + + pub fn kind(&self) -> &NthValueKind { + &self.kind + } } static FIRST_VALUE_DOCUMENTATION: LazyLock = LazyLock::new(|| { @@ -337,6 +341,10 @@ impl WindowUDFImpl for NthValue { NthValueKind::Nth => Some(get_nth_value_doc()), } } + + fn is_causal(&self) -> bool { + false // TODO: if argument is literal, we can max(N, limit) + } } #[derive(Debug, Clone)] diff --git a/datafusion/functions-window/src/ntile.rs b/datafusion/functions-window/src/ntile.rs index 14055b885023..94583327234e 100644 --- a/datafusion/functions-window/src/ntile.rs +++ b/datafusion/functions-window/src/ntile.rs @@ -156,6 +156,10 @@ impl WindowUDFImpl for Ntile { fn documentation(&self) -> Option<&Documentation> { self.doc() } + + fn is_causal(&self) -> bool { + false + } } #[derive(Debug)] diff --git a/datafusion/functions-window/src/rank.rs b/datafusion/functions-window/src/rank.rs index 51ec4bbdbf07..1dd1287d880c 100644 --- a/datafusion/functions-window/src/rank.rs +++ b/datafusion/functions-window/src/rank.rs @@ -240,6 +240,14 @@ impl WindowUDFImpl for Rank { RankType::Percent => Some(get_percent_rank_doc()), } } + + fn is_causal(&self) -> bool { + match self.rank_type { + RankType::Basic => true, + RankType::Dense => true, + RankType::Percent => false, + } + } } /// State for the RANK(rank) built-in window function. diff --git a/datafusion/functions-window/src/row_number.rs b/datafusion/functions-window/src/row_number.rs index 2ef490c3c3ed..affff0870f91 100644 --- a/datafusion/functions-window/src/row_number.rs +++ b/datafusion/functions-window/src/row_number.rs @@ -121,6 +121,10 @@ impl WindowUDFImpl for RowNumber { fn documentation(&self) -> Option<&Documentation> { self.doc() } + + fn is_causal(&self) -> bool { + true + } } /// State for the `row_number` built-in window function. diff --git a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs index 7f526a0c43cc..aabe768251fd 100644 --- a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs +++ b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs @@ -20,6 +20,13 @@ use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{Transformed, TreeNode}; use datafusion_common::ScalarValue; use datafusion_expr::{WindowFrameBound, WindowFrameUnits, WindowUDFImpl}; +use datafusion_functions_window::lead_lag::{WindowShift, WindowShiftKind}; +use datafusion_functions_window::nth_value::{NthValue, NthValueKind}; +use datafusion_physical_expr::expressions::Literal; +use datafusion_physical_expr::window::{ + PlainAggregateWindowExpr, SlidingAggregateWindowExpr, StandardWindowExpr, + StandardWindowFunctionExpr, WindowExpr, +}; use datafusion_physical_plan::execution_plan::CardinalityEffect; use datafusion_physical_plan::limit::GlobalLimitExec; use datafusion_physical_plan::sorts::sort::SortExec; @@ -27,13 +34,8 @@ use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeE use datafusion_physical_plan::windows::{BoundedWindowAggExec, WindowUDFExpr}; use datafusion_physical_plan::ExecutionPlan; use std::cmp; +use std::cmp::max; use std::sync::Arc; -use itertools::Itertools; -use datafusion_functions_window::lead_lag::WindowShift; -use datafusion_functions_window::nth_value::NthValue; -use datafusion_functions_window::row_number::RowNumber; -use datafusion_physical_expr::expressions::{Column, Literal}; -use datafusion_physical_expr::window::{PlainAggregateWindowExpr, SlidingAggregateWindowExpr, StandardWindowExpr, StandardWindowFunctionExpr, WindowExpr}; /// This rule inspects [`ExecutionPlan`]'s attempting to find fetch limits that were not pushed /// down by `LimitPushdown` because [BoundedWindowAggExec]s were "in the way". If the window is @@ -86,7 +88,7 @@ impl PhysicalOptimizerRule for LimitPushPastWindows { // grow the limit if we hit a window function if let Some(window) = node.as_any().downcast_ref::() { for expr in window.window_expr().iter() { - let Some(growth) = grow_window_amount(expr) else { + if !grow_limit(expr, &mut latest_max) { return reset(node, &mut latest_max); }; let frame = expr.get_window_frame(); @@ -96,7 +98,7 @@ impl PhysicalOptimizerRule for LimitPushPastWindows { let Some(end_bound) = bound_to_usize(&frame.end_bound) else { return reset(node, &mut latest_max); }; - latest_max = cmp::max(end_bound + growth, latest_max); + latest_max = max(end_bound, latest_max); } return Ok(Transformed::no(node)); } @@ -164,51 +166,64 @@ impl PhysicalOptimizerRule for LimitPushPastWindows { /// /// # Returns /// -/// `None` if we don't know the effect on window size, else Some(amount) -fn grow_window_amount(expr: &Arc) -> Option { - println!("grow_window_amount expr={:?}", expr); +/// `false` if we can't optimize +fn grow_limit(expr: &Arc, limit: &mut usize) -> bool { + // White list aggregates if expr.as_any().is::() - || expr.as_any().is::() { - return Some(0); // aggregates do not change window + || expr.as_any().is::() + { + return true; // aggregates do not change window } + + // Grab the window function let Some(swe) = expr.as_any().downcast_ref::() else { - return None; // `StandardWindowExpr` is the only remaining one in DataFusion code + return false; // `StandardWindowExpr` is the only remaining one in DataFusion code }; let swfe = swe.get_standard_func_expr(); let Some(udf) = swfe.as_any().downcast_ref::() else { - return None; // Always `WindowUDFExpr` unless someone added extensions + return false; // Always `WindowUDFExpr` unless someone added extensions }; let fun = udf.fun().inner(); + if fun.is_causal() { + return true; // no effect + } - // no change - if fun.as_any().is::() { - return Some(0); - }; - - // get arguments - let args = udf.expressions(); - let Ok(arg) = args.iter().exactly_one() else { - return None; - }; - let Some(amount) = arg.as_any().downcast_ref::() else { - return None; + // Grab the relevant argument + let amount = match udf.expressions().as_slice() { + [_, expr, ..] => { + let Some(lit) = expr.as_any().downcast_ref::() else { + return false; // not statically analyzable + }; + let ScalarValue::Int64(Some(amount)) = lit.value() else { + return false; // we should only get int64 from the parser + }; + *amount + } + [_] => 1, // default value + [] => return false, // invalid arguments }; - // window manipulating + // calculate the effect on the window if let Some(fun) = fun.as_any().downcast_ref::() { - println!("WindowShift args={:?}", amount); - return Some(0); // TODO: get amount and return it + if *fun.kind() == WindowShiftKind::Lag { + return true; // lag is causal + } + *limit += amount.max(0) as usize; // lead causes us to look ahead N rows + return true; }; if let Some(fun) = fun.as_any().downcast_ref::() { - println!("WindowShift args={:?}", amount); - return Some(0); // TODO: get amount and return it + match fun.kind() { + NthValueKind::First => return true, // causal + NthValueKind::Last => return false, // requires whole window + NthValueKind::Nth => {} + } + let nth_row = (amount - 1).max(0) as usize; + *limit = max(*limit, nth_row); // i.e. 10th row, limit 5 - so make limit 10 + return true; }; - // explicitly can't handle: cume_dist, ntile, percent_rank - - // maybe in the future: dense_rank, rank, first_value - - None // Don't know, can't optimize + // We don't know about this window function, so we can't optimize + false } fn bound_to_usize(bound: &WindowFrameBound) -> Option { From 8817b6dcea6a07407cf6caf59749ac0e8c2769d6 Mon Sep 17 00:00:00 2001 From: Brent Gardner Date: Sat, 11 Oct 2025 13:55:47 -0600 Subject: [PATCH 07/30] Break out window limit tests --- datafusion/sqllogictest/test_files/window.slt | 18 ----- .../sqllogictest/test_files/window_limits.slt | 77 +++++++++++++++++++ 2 files changed, 77 insertions(+), 18 deletions(-) create mode 100644 datafusion/sqllogictest/test_files/window_limits.slt diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt index cb2faf1cc785..fb9ad1780f6d 100644 --- a/datafusion/sqllogictest/test_files/window.slt +++ b/datafusion/sqllogictest/test_files/window.slt @@ -1409,27 +1409,9 @@ SELECT 4144173353 28472563256 20935849039 4076864659 28118515915 24997484146 -query I -SELECT LEAD(c9) OVER (ORDER BY c9 ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM aggregate_test_100 LIMIT 5 ----- -63044568 -141047417 -141680161 -145294611 -225513085 - statement ok set datafusion.optimizer.enable_window_limits = true; -query I -SELECT LEAD(c9) OVER (ORDER BY c9 ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM aggregate_test_100 LIMIT 5 ----- -63044568 -141047417 -141680161 -145294611 -225513085 - query III SELECT c9, diff --git a/datafusion/sqllogictest/test_files/window_limits.slt b/datafusion/sqllogictest/test_files/window_limits.slt new file mode 100644 index 000000000000..425c2850c99c --- /dev/null +++ b/datafusion/sqllogictest/test_files/window_limits.slt @@ -0,0 +1,77 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +statement ok +CREATE EXTERNAL TABLE aggregate_test_100 ( + c1 VARCHAR NOT NULL, + c2 TINYINT NOT NULL, + c3 SMALLINT NOT NULL, + c4 SMALLINT, + c5 INT, + c6 BIGINT NOT NULL, + c7 SMALLINT NOT NULL, + c8 INT NOT NULL, + c9 BIGINT UNSIGNED NOT NULL, + c10 VARCHAR NOT NULL, + c11 FLOAT NOT NULL, + c12 DOUBLE NOT NULL, + c13 VARCHAR NOT NULL +) +STORED AS CSV +LOCATION '../../testing/data/csv/aggregate_test_100.csv' +OPTIONS ('format.has_header' 'true'); + +statement ok +CREATE EXTERNAL TABLE null_cases( + c1 BIGINT NULL, + c2 DOUBLE NULL, + c3 BIGINT NULL +) +STORED AS CSV +LOCATION '../core/tests/data/null_cases.csv' +OPTIONS ('format.has_header' 'true'); + +### This is the same table as +### execute_with_partition with 4 partitions +statement ok +CREATE EXTERNAL TABLE test (c1 int, c2 bigint, c3 boolean) +STORED AS CSV LOCATION '../core/tests/data/partitioned_csv' +OPTIONS('format.has_header' 'false'); + +statement ok +set datafusion.optimizer.enable_window_limits = false; + +query I +SELECT LEAD(c9) OVER (ORDER BY c9 ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM aggregate_test_100 LIMIT 5 +---- +63044568 +141047417 +141680161 +145294611 +225513085 + +statement ok +set datafusion.optimizer.enable_window_limits = true; + +query I +SELECT LEAD(c9) OVER (ORDER BY c9 ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM aggregate_test_100 LIMIT 5 +---- +63044568 +141047417 +141680161 +145294611 +225513085 From b40d465a22cd297f9b8f0ac21804324f8d0d9735 Mon Sep 17 00:00:00 2001 From: Brent Gardner Date: Sat, 11 Oct 2025 15:16:34 -0600 Subject: [PATCH 08/30] LimitEffect --- datafusion-examples/examples/advanced_udwf.rs | 27 +++++-- .../user_defined_window_functions.rs | 27 ++++++- datafusion/expr/src/expr_fn.rs | 16 ++-- datafusion/expr/src/lib.rs | 2 +- datafusion/expr/src/udwf.rs | 31 +++++++- datafusion/ffi/src/udwf/mod.rs | 10 +++ datafusion/functions-window/src/cume_dist.rs | 7 +- datafusion/functions-window/src/lead_lag.rs | 35 +++++++-- datafusion/functions-window/src/nth_value.rs | 27 ++++++- datafusion/functions-window/src/ntile.rs | 8 +- datafusion/functions-window/src/rank.rs | 11 ++- datafusion/functions-window/src/row_number.rs | 10 ++- .../simplify_expressions/expr_simplifier.rs | 9 +++ .../window/standard_window_function_expr.rs | 4 +- .../src/limit_pushdown_past_window.rs | 73 ++++++------------- datafusion/physical-plan/src/windows/mod.rs | 6 +- datafusion/proto/tests/cases/mod.rs | 14 +++- .../tests/cases/roundtrip_logical_plan.rs | 17 ++++- .../sqllogictest/test_files/window_limits.slt | 2 +- 19 files changed, 246 insertions(+), 90 deletions(-) diff --git a/datafusion-examples/examples/advanced_udwf.rs b/datafusion-examples/examples/advanced_udwf.rs index 50860535b720..065f961bb852 100644 --- a/datafusion-examples/examples/advanced_udwf.rs +++ b/datafusion-examples/examples/advanced_udwf.rs @@ -15,9 +15,6 @@ // specific language governing permissions and limitations // under the License. -use datafusion::{arrow::datatypes::DataType, logical_expr::Volatility}; -use std::any::Any; - use arrow::datatypes::Field; use arrow::{ array::{ArrayRef, AsArray, Float64Array}, @@ -33,10 +30,14 @@ use datafusion::logical_expr::function::{ }; use datafusion::logical_expr::simplify::SimplifyInfo; use datafusion::logical_expr::{ - Expr, PartitionEvaluator, Signature, WindowFrame, WindowFunctionDefinition, - WindowUDF, WindowUDFImpl, + Expr, LimitEffect, PartitionEvaluator, Signature, WindowFrame, + WindowFunctionDefinition, WindowUDF, WindowUDFImpl, }; +use datafusion::physical_expr::PhysicalExpr; use datafusion::prelude::*; +use datafusion::{arrow::datatypes::DataType, logical_expr::Volatility}; +use std::any::Any; +use std::sync::Arc; /// This example shows how to use the full WindowUDFImpl API to implement a user /// defined window function. As in the `simple_udwf.rs` example, this struct implements @@ -91,6 +92,14 @@ impl WindowUDFImpl for SmoothItUdf { fn field(&self, field_args: WindowUDFFieldArgs) -> Result { Ok(Field::new(field_args.name(), DataType::Float64, true).into()) } + + fn is_causal(&self) -> bool { + false + } + + fn limit_effect(&self, _args: &[Arc]) -> LimitEffect { + LimitEffect::Unknown + } } /// This implements the lowest level evaluation for a window function @@ -211,6 +220,14 @@ impl WindowUDFImpl for SimplifySmoothItUdf { fn field(&self, field_args: WindowUDFFieldArgs) -> Result { Ok(Field::new(field_args.name(), DataType::Float64, true).into()) } + + fn is_causal(&self) -> bool { + false + } + + fn limit_effect(&self, _args: &[Arc]) -> LimitEffect { + LimitEffect::Unknown + } } // create local execution context with `cars.csv` registered as a table named `cars` diff --git a/datafusion/core/tests/user_defined/user_defined_window_functions.rs b/datafusion/core/tests/user_defined/user_defined_window_functions.rs index 8871e3af275f..c9be847bda72 100644 --- a/datafusion/core/tests/user_defined/user_defined_window_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_window_functions.rs @@ -30,7 +30,8 @@ use datafusion::prelude::SessionContext; use datafusion_common::exec_datafusion_err; use datafusion_expr::ptr_eq::PtrEq; use datafusion_expr::{ - PartitionEvaluator, Signature, TypeSignature, Volatility, WindowUDF, WindowUDFImpl, + LimitEffect, PartitionEvaluator, Signature, TypeSignature, Volatility, WindowUDF, + WindowUDFImpl, }; use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; use datafusion_functions_window_common::{ @@ -572,6 +573,14 @@ impl OddCounter { fn field(&self, field_args: WindowUDFFieldArgs) -> Result { Ok(Field::new(field_args.name(), DataType::Int64, true).into()) } + + fn is_causal(&self) -> bool { + false + } + + fn limit_effect(&self, _args: &[Arc]) -> LimitEffect { + LimitEffect::Unknown + } } ctx.register_udwf(WindowUDF::from(SimpleWindowUDF::new(test_state))) @@ -691,6 +700,14 @@ impl WindowUDFImpl for VariadicWindowUDF { fn field(&self, _: WindowUDFFieldArgs) -> Result { unimplemented!("unnecessary for testing"); } + + fn is_causal(&self) -> bool { + false + } + + fn limit_effect(&self, _args: &[Arc]) -> LimitEffect { + LimitEffect::Unknown + } } #[test] @@ -844,6 +861,14 @@ impl WindowUDFImpl for MetadataBasedWindowUdf { .with_metadata(self.metadata.clone()) .into()) } + + fn is_causal(&self) -> bool { + false + } + + fn limit_effect(&self, _args: &[Arc]) -> LimitEffect { + LimitEffect::Unknown + } } #[derive(Debug)] diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 42b4cd3a7a8d..e095f8d4b19a 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -29,8 +29,8 @@ use crate::ptr_eq::PtrEq; use crate::select_expr::SelectExpr; use crate::{ conditional_expressions::CaseBuilder, expr::Sort, logical_plan::Subquery, - AggregateUDF, Expr, LogicalPlan, Operator, PartitionEvaluator, ScalarFunctionArgs, - ScalarFunctionImplementation, ScalarUDF, Signature, Volatility, + AggregateUDF, Expr, LimitEffect, LogicalPlan, Operator, PartitionEvaluator, + ScalarFunctionArgs, ScalarFunctionImplementation, ScalarUDF, Signature, Volatility, }; use crate::{ AggregateUDFImpl, ColumnarValue, ScalarUDFImpl, WindowFrame, WindowUDF, WindowUDFImpl, @@ -42,6 +42,7 @@ use arrow::datatypes::{DataType, Field, FieldRef}; use datafusion_common::{plan_err, Column, Result, ScalarValue, Spans, TableReference}; use datafusion_functions_window_common::field::WindowUDFFieldArgs; use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; +use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use std::any::Any; use std::collections::HashMap; use std::fmt::Debug; @@ -625,7 +626,6 @@ pub fn create_udwf( return_type: Arc, volatility: Volatility, partition_evaluator_factory: PartitionEvaluatorFactory, - is_causal: bool, ) -> WindowUDF { let return_type = Arc::unwrap_or_clone(return_type); WindowUDF::from(SimpleWindowUDF::new( @@ -634,7 +634,6 @@ pub fn create_udwf( return_type, volatility, partition_evaluator_factory, - is_causal, )) } @@ -646,7 +645,6 @@ pub struct SimpleWindowUDF { signature: Signature, return_type: DataType, partition_evaluator_factory: PtrEq, - is_causal: bool, } impl Debug for SimpleWindowUDF { @@ -669,7 +667,6 @@ impl SimpleWindowUDF { return_type: DataType, volatility: Volatility, partition_evaluator_factory: PartitionEvaluatorFactory, - is_causal: bool, ) -> Self { let name = name.into(); let signature = Signature::exact([input_type].to_vec(), volatility); @@ -678,7 +675,6 @@ impl SimpleWindowUDF { signature, return_type, partition_evaluator_factory: partition_evaluator_factory.into(), - is_causal, } } } @@ -712,7 +708,11 @@ impl WindowUDFImpl for SimpleWindowUDF { } fn is_causal(&self) -> bool { - self.is_causal + false + } + + fn limit_effect(&self, _args: &[Arc]) -> LimitEffect { + LimitEffect::Unknown } } diff --git a/datafusion/expr/src/lib.rs b/datafusion/expr/src/lib.rs index 1c9734a89bd3..346d373ff5b4 100644 --- a/datafusion/expr/src/lib.rs +++ b/datafusion/expr/src/lib.rs @@ -117,7 +117,7 @@ pub use udaf::{ ReversedUDAF, SetMonotonicity, StatisticsArgs, }; pub use udf::{ReturnFieldArgs, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl}; -pub use udwf::{ReversedUDWF, WindowUDF, WindowUDFImpl}; +pub use udwf::{LimitEffect, ReversedUDWF, WindowUDF, WindowUDFImpl}; pub use window_frame::{WindowFrame, WindowFrameBound, WindowFrameUnits}; #[cfg(test)] diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs index cce8f575741c..7f692182b0a1 100644 --- a/datafusion/expr/src/udwf.rs +++ b/datafusion/expr/src/udwf.rs @@ -417,6 +417,21 @@ pub trait WindowUDFImpl: Debug + DynEq + DynHash + Send + Sync { /// Returns true if this function only needs access to current and previous rows fn is_causal(&self) -> bool; + + /// If not causal, returns the effect this function will have on the window + fn limit_effect(&self, args: &[Arc]) -> LimitEffect; +} + +/// the effect this function will have on the window +pub enum LimitEffect { + /// Does not affect the limit (i.e. this is causal) + None, + /// Either undeclared, or dynamic (only evaluatable at run time) + Unknown, + /// Grow the limit by N rows + Relative(usize), + /// Limit needs to be at least N rows + Absolute(usize), } pub enum ReversedUDWF { @@ -530,19 +545,25 @@ impl WindowUDFImpl for AliasedWindowUDFImpl { fn is_causal(&self) -> bool { self.inner.is_causal() } + + fn limit_effect(&self, args: &[Arc]) -> LimitEffect { + self.inner.limit_effect(args) + } } #[cfg(test)] mod test { - use crate::{PartitionEvaluator, WindowUDF, WindowUDFImpl}; + use crate::{LimitEffect, PartitionEvaluator, WindowUDF, WindowUDFImpl}; use arrow::datatypes::{DataType, FieldRef}; use datafusion_common::Result; use datafusion_expr_common::signature::{Signature, Volatility}; use datafusion_functions_window_common::field::WindowUDFFieldArgs; use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; + use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use std::any::Any; use std::cmp::Ordering; use std::hash::{DefaultHasher, Hash, Hasher}; + use std::sync::Arc; #[derive(Debug, Clone, PartialEq, Eq, Hash)] struct AWindowUDF { @@ -585,6 +606,10 @@ mod test { fn is_causal(&self) -> bool { false } + + fn limit_effect(&self, _args: &[Arc]) -> LimitEffect { + LimitEffect::Unknown + } } #[derive(Debug, Clone, PartialEq, Eq, Hash)] @@ -628,6 +653,10 @@ mod test { fn is_causal(&self) -> bool { false } + + fn limit_effect(&self, _args: &[Arc]) -> LimitEffect { + LimitEffect::Unknown + } } #[test] diff --git a/datafusion/ffi/src/udwf/mod.rs b/datafusion/ffi/src/udwf/mod.rs index 2bede01bb69f..97015baff196 100644 --- a/datafusion/ffi/src/udwf/mod.rs +++ b/datafusion/ffi/src/udwf/mod.rs @@ -25,6 +25,8 @@ use arrow::{ datatypes::{DataType, SchemaRef}, }; use arrow_schema::{Field, FieldRef}; +use datafusion::logical_expr::LimitEffect; +use datafusion::physical_expr::PhysicalExpr; use datafusion::{ error::DataFusionError, logical_expr::{ @@ -349,6 +351,14 @@ impl WindowUDFImpl for ForeignWindowUDF { let options: Option<&FFI_SortOptions> = self.udf.sort_options.as_ref().into(); options.map(|s| s.into()) } + + fn is_causal(&self) -> bool { + false + } + + fn limit_effect(&self, _args: &[Arc]) -> LimitEffect { + LimitEffect::Unknown + } } #[repr(C)] diff --git a/datafusion/functions-window/src/cume_dist.rs b/datafusion/functions-window/src/cume_dist.rs index 42d00570e6cd..377c2194a884 100644 --- a/datafusion/functions-window/src/cume_dist.rs +++ b/datafusion/functions-window/src/cume_dist.rs @@ -23,11 +23,12 @@ use datafusion_common::arrow::datatypes::DataType; use datafusion_common::arrow::datatypes::Field; use datafusion_common::Result; use datafusion_expr::{ - Documentation, PartitionEvaluator, Signature, Volatility, WindowUDFImpl, + Documentation, LimitEffect, PartitionEvaluator, Signature, Volatility, WindowUDFImpl, }; use datafusion_functions_window_common::field; use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; use datafusion_macros::user_doc; +use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use field::WindowUDFFieldArgs; use std::any::Any; use std::fmt::Debug; @@ -114,6 +115,10 @@ impl WindowUDFImpl for CumeDist { fn is_causal(&self) -> bool { false } + + fn limit_effect(&self, _args: &[Arc]) -> LimitEffect { + LimitEffect::Unknown + } } #[derive(Debug, Default)] diff --git a/datafusion/functions-window/src/lead_lag.rs b/datafusion/functions-window/src/lead_lag.rs index 047f6120ad26..8ef866c1699a 100644 --- a/datafusion/functions-window/src/lead_lag.rs +++ b/datafusion/functions-window/src/lead_lag.rs @@ -25,12 +25,13 @@ use datafusion_common::arrow::datatypes::Field; use datafusion_common::{arrow_datafusion_err, DataFusionError, Result, ScalarValue}; use datafusion_doc::window_doc_sections::DOC_SECTION_ANALYTICAL; use datafusion_expr::{ - Documentation, Literal, PartitionEvaluator, ReversedUDWF, Signature, TypeSignature, - Volatility, WindowUDFImpl, + Documentation, LimitEffect, Literal, PartitionEvaluator, ReversedUDWF, Signature, + TypeSignature, Volatility, WindowUDFImpl, }; use datafusion_functions_window_common::expr::ExpressionArgs; use datafusion_functions_window_common::field::WindowUDFFieldArgs; use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; +use datafusion_physical_expr::expressions; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use std::any::Any; use std::cmp::min; @@ -305,7 +306,29 @@ impl WindowUDFImpl for WindowShift { } fn is_causal(&self) -> bool { - false // TODO: we can grow limit by N + match self.kind { + WindowShiftKind::Lag => true, + WindowShiftKind::Lead => false, + } + } + + fn limit_effect(&self, args: &[Arc]) -> LimitEffect { + if self.kind == WindowShiftKind::Lag { + return LimitEffect::None; + } + match args { + [_, expr, ..] => { + let Some(lit) = expr.as_any().downcast_ref::() + else { + return LimitEffect::Unknown; + }; + let ScalarValue::Int64(Some(amount)) = lit.value() else { + return LimitEffect::Unknown; // we should only get int64 from the parser + }; + LimitEffect::Relative((*amount).max(0) as usize) + } + _ => LimitEffect::Unknown, // invalid arguments + } } } @@ -338,10 +361,8 @@ fn parse_expr( let default_value = get_scalar_value_from_args(input_exprs, 2)?; default_value.map_or(Ok(expr), |value| { - ScalarValue::try_from(&value.data_type()).map(|v| { - Arc::new(datafusion_physical_expr::expressions::Literal::new(v)) - as Arc - }) + ScalarValue::try_from(&value.data_type()) + .map(|v| Arc::new(expressions::Literal::new(v)) as Arc) }) } diff --git a/datafusion/functions-window/src/nth_value.rs b/datafusion/functions-window/src/nth_value.rs index 61184be8be0b..47ebc44f3925 100644 --- a/datafusion/functions-window/src/nth_value.rs +++ b/datafusion/functions-window/src/nth_value.rs @@ -26,18 +26,20 @@ use datafusion_common::{exec_datafusion_err, exec_err, Result, ScalarValue}; use datafusion_doc::window_doc_sections::DOC_SECTION_ANALYTICAL; use datafusion_expr::window_state::WindowAggState; use datafusion_expr::{ - Documentation, Literal, PartitionEvaluator, ReversedUDWF, Signature, TypeSignature, - Volatility, WindowUDFImpl, + Documentation, LimitEffect, Literal, PartitionEvaluator, ReversedUDWF, Signature, + TypeSignature, Volatility, WindowUDFImpl, }; use datafusion_functions_window_common::field; use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; +use datafusion_physical_expr::expressions; +use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use field::WindowUDFFieldArgs; use std::any::Any; use std::cmp::Ordering; use std::fmt::Debug; use std::hash::Hash; use std::ops::Range; -use std::sync::LazyLock; +use std::sync::{Arc, LazyLock}; get_or_init_udwf!( First, @@ -343,7 +345,24 @@ impl WindowUDFImpl for NthValue { } fn is_causal(&self) -> bool { - false // TODO: if argument is literal, we can max(N, limit) + false + } + + fn limit_effect(&self, args: &[Arc]) -> LimitEffect { + match args { + [_, expr, ..] => { + let Some(lit) = expr.as_any().downcast_ref::() + else { + return LimitEffect::Unknown; + }; + let ScalarValue::Int64(Some(amount)) = lit.value() else { + return LimitEffect::Unknown; // we should only get int64 from the parser + }; + let nth_row = (amount - 1).max(0) as usize; + LimitEffect::Absolute(nth_row) + } + _ => LimitEffect::Unknown, // invalid arguments + } } } diff --git a/datafusion/functions-window/src/ntile.rs b/datafusion/functions-window/src/ntile.rs index 94583327234e..add913faaa3d 100644 --- a/datafusion/functions-window/src/ntile.rs +++ b/datafusion/functions-window/src/ntile.rs @@ -25,11 +25,13 @@ use datafusion_common::arrow::array::{ArrayRef, UInt64Array}; use datafusion_common::arrow::datatypes::{DataType, Field}; use datafusion_common::{exec_datafusion_err, exec_err, Result}; use datafusion_expr::{ - Documentation, Expr, PartitionEvaluator, Signature, Volatility, WindowUDFImpl, + Documentation, Expr, LimitEffect, PartitionEvaluator, Signature, Volatility, + WindowUDFImpl, }; use datafusion_functions_window_common::field; use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; use datafusion_macros::user_doc; +use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use field::WindowUDFFieldArgs; use std::any::Any; use std::fmt::Debug; @@ -160,6 +162,10 @@ impl WindowUDFImpl for Ntile { fn is_causal(&self) -> bool { false } + + fn limit_effect(&self, _args: &[Arc]) -> LimitEffect { + LimitEffect::Unknown + } } #[derive(Debug)] diff --git a/datafusion/functions-window/src/rank.rs b/datafusion/functions-window/src/rank.rs index 1dd1287d880c..6d0fcd0a0cdc 100644 --- a/datafusion/functions-window/src/rank.rs +++ b/datafusion/functions-window/src/rank.rs @@ -29,10 +29,11 @@ use datafusion_common::utils::get_row_at_idx; use datafusion_common::{exec_err, Result, ScalarValue}; use datafusion_doc::window_doc_sections::DOC_SECTION_RANKING; use datafusion_expr::{ - Documentation, PartitionEvaluator, Signature, Volatility, WindowUDFImpl, + Documentation, LimitEffect, PartitionEvaluator, Signature, Volatility, WindowUDFImpl, }; use datafusion_functions_window_common::field; use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; +use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use field::WindowUDFFieldArgs; use std::any::Any; use std::fmt::Debug; @@ -248,6 +249,14 @@ impl WindowUDFImpl for Rank { RankType::Percent => false, } } + + fn limit_effect(&self, _args: &[Arc]) -> LimitEffect { + match self.rank_type { + RankType::Basic => LimitEffect::None, + RankType::Dense => LimitEffect::None, + RankType::Percent => LimitEffect::Unknown, + } + } } /// State for the RANK(rank) built-in window function. diff --git a/datafusion/functions-window/src/row_number.rs b/datafusion/functions-window/src/row_number.rs index affff0870f91..8a6306b87a78 100644 --- a/datafusion/functions-window/src/row_number.rs +++ b/datafusion/functions-window/src/row_number.rs @@ -25,15 +25,17 @@ use datafusion_common::arrow::datatypes::DataType; use datafusion_common::arrow::datatypes::Field; use datafusion_common::{Result, ScalarValue}; use datafusion_expr::{ - Documentation, PartitionEvaluator, Signature, Volatility, WindowUDFImpl, + Documentation, LimitEffect, PartitionEvaluator, Signature, Volatility, WindowUDFImpl, }; use datafusion_functions_window_common::field; use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; use datafusion_macros::user_doc; +use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use field::WindowUDFFieldArgs; use std::any::Any; use std::fmt::Debug; use std::ops::Range; +use std::sync::Arc; define_udwf_and_expr!( RowNumber, @@ -125,6 +127,10 @@ impl WindowUDFImpl for RowNumber { fn is_causal(&self) -> bool { true } + + fn limit_effect(&self, _args: &[Arc]) -> LimitEffect { + LimitEffect::None + } } /// State for the `row_number` built-in window function. @@ -144,7 +150,7 @@ impl PartitionEvaluator for NumRowsEvaluator { _values: &[ArrayRef], num_rows: usize, ) -> Result { - Ok(std::sync::Arc::new(UInt64Array::from_iter_values( + Ok(Arc::new(UInt64Array::from_iter_values( 1..(num_rows as u64) + 1, ))) } diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 4710ece24f91..cf849464fff3 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -2181,6 +2181,7 @@ mod tests { }; use datafusion_functions_window_common::field::WindowUDFFieldArgs; use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; + use datafusion_physical_expr::PhysicalExpr; use std::hash::Hash; use std::sync::LazyLock; use std::{ @@ -4865,6 +4866,14 @@ mod tests { fn field(&self, _field_args: WindowUDFFieldArgs) -> Result { unimplemented!("not needed for tests") } + + fn is_causal(&self) -> bool { + false + } + + fn limit_effect(&self, _args: &[Arc]) -> LimitEffect { + LimitEffect::Unknown + } } #[derive(Debug, PartialEq, Eq, Hash)] struct VolatileUdf { diff --git a/datafusion/physical-expr/src/window/standard_window_function_expr.rs b/datafusion/physical-expr/src/window/standard_window_function_expr.rs index 871f735e9a96..ca7c3a4db3d4 100644 --- a/datafusion/physical-expr/src/window/standard_window_function_expr.rs +++ b/datafusion/physical-expr/src/window/standard_window_function_expr.rs @@ -21,7 +21,7 @@ use arrow::array::ArrayRef; use arrow::datatypes::{FieldRef, SchemaRef}; use arrow::record_batch::RecordBatch; use datafusion_common::Result; -use datafusion_expr::PartitionEvaluator; +use datafusion_expr::{LimitEffect, PartitionEvaluator}; use std::any::Any; use std::sync::Arc; @@ -90,4 +90,6 @@ pub trait StandardWindowFunctionExpr: Send + Sync + std::fmt::Debug { fn get_result_ordering(&self, _schema: &SchemaRef) -> Option { None } + + fn limit_effect(&self) -> LimitEffect; } diff --git a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs index aabe768251fd..b086c26a4d58 100644 --- a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs +++ b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs @@ -19,10 +19,7 @@ use crate::PhysicalOptimizerRule; use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{Transformed, TreeNode}; use datafusion_common::ScalarValue; -use datafusion_expr::{WindowFrameBound, WindowFrameUnits, WindowUDFImpl}; -use datafusion_functions_window::lead_lag::{WindowShift, WindowShiftKind}; -use datafusion_functions_window::nth_value::{NthValue, NthValueKind}; -use datafusion_physical_expr::expressions::Literal; +use datafusion_expr::{LimitEffect, WindowFrameBound, WindowFrameUnits}; use datafusion_physical_expr::window::{ PlainAggregateWindowExpr, SlidingAggregateWindowExpr, StandardWindowExpr, StandardWindowFunctionExpr, WindowExpr, @@ -87,10 +84,18 @@ impl PhysicalOptimizerRule for LimitPushPastWindows { // grow the limit if we hit a window function if let Some(window) = node.as_any().downcast_ref::() { + let mut max_rel = 0; + let mut max_abs = 0; for expr in window.window_expr().iter() { - if !grow_limit(expr, &mut latest_max) { - return reset(node, &mut latest_max); - }; + // grow based on function requirements + match grow_limit(expr) { + LimitEffect::None => {} + LimitEffect::Unknown => return reset(node, &mut latest_max), + LimitEffect::Relative(rel) => max_rel = max_rel.max(rel), + LimitEffect::Absolute(abs) => max_abs = max_abs.max(abs), + } + + // grow based on frames let frame = expr.get_window_frame(); if frame.units != WindowFrameUnits::Rows { return reset(node, &mut latest_max); // expression-based limits? @@ -100,6 +105,10 @@ impl PhysicalOptimizerRule for LimitPushPastWindows { }; latest_max = max(end_bound, latest_max); } + + // finish grow + latest_max = latest_max.max(latest_max + max_rel); + latest_max = latest_max.max(max_abs); return Ok(Transformed::no(node)); } @@ -167,63 +176,29 @@ impl PhysicalOptimizerRule for LimitPushPastWindows { /// # Returns /// /// `false` if we can't optimize -fn grow_limit(expr: &Arc, limit: &mut usize) -> bool { +fn grow_limit(expr: &Arc) -> LimitEffect { // White list aggregates if expr.as_any().is::() || expr.as_any().is::() { - return true; // aggregates do not change window + return LimitEffect::None; } // Grab the window function let Some(swe) = expr.as_any().downcast_ref::() else { - return false; // `StandardWindowExpr` is the only remaining one in DataFusion code + return LimitEffect::Unknown; // should be only remaining type }; let swfe = swe.get_standard_func_expr(); let Some(udf) = swfe.as_any().downcast_ref::() else { - return false; // Always `WindowUDFExpr` unless someone added extensions + return LimitEffect::Unknown; // should be only remaining type }; + + // Return it's effect let fun = udf.fun().inner(); if fun.is_causal() { - return true; // no effect + return LimitEffect::None; } - - // Grab the relevant argument - let amount = match udf.expressions().as_slice() { - [_, expr, ..] => { - let Some(lit) = expr.as_any().downcast_ref::() else { - return false; // not statically analyzable - }; - let ScalarValue::Int64(Some(amount)) = lit.value() else { - return false; // we should only get int64 from the parser - }; - *amount - } - [_] => 1, // default value - [] => return false, // invalid arguments - }; - - // calculate the effect on the window - if let Some(fun) = fun.as_any().downcast_ref::() { - if *fun.kind() == WindowShiftKind::Lag { - return true; // lag is causal - } - *limit += amount.max(0) as usize; // lead causes us to look ahead N rows - return true; - }; - if let Some(fun) = fun.as_any().downcast_ref::() { - match fun.kind() { - NthValueKind::First => return true, // causal - NthValueKind::Last => return false, // requires whole window - NthValueKind::Nth => {} - } - let nth_row = (amount - 1).max(0) as usize; - *limit = max(*limit, nth_row); // i.e. 10th row, limit 5 - so make limit 10 - return true; - }; - - // We don't know about this window function, so we can't optimize - false + udf.limit_effect() } fn bound_to_usize(bound: &WindowFrameBound) -> Option { diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs index 829cadf50a86..cd35325eb3d7 100644 --- a/datafusion/physical-plan/src/windows/mod.rs +++ b/datafusion/physical-plan/src/windows/mod.rs @@ -33,7 +33,7 @@ use arrow::datatypes::{Schema, SchemaRef}; use arrow_schema::{FieldRef, SortOptions}; use datafusion_common::{exec_err, Result}; use datafusion_expr::{ - PartitionEvaluator, ReversedUDWF, SetMonotonicity, WindowFrame, + LimitEffect, PartitionEvaluator, ReversedUDWF, SetMonotonicity, WindowFrame, WindowFunctionDefinition, WindowUDF, }; use datafusion_functions_window_common::expr::ExpressionArgs; @@ -281,6 +281,10 @@ impl StandardWindowFunctionExpr for WindowUDFExpr { PhysicalSortExpr { expr, options } }) } + + fn limit_effect(&self) -> LimitEffect { + self.fun.inner().limit_effect(self.args.as_slice()) + } } pub(crate) fn calc_requirements< diff --git a/datafusion/proto/tests/cases/mod.rs b/datafusion/proto/tests/cases/mod.rs index 1c52a872df13..98740917eb6a 100644 --- a/datafusion/proto/tests/cases/mod.rs +++ b/datafusion/proto/tests/cases/mod.rs @@ -17,17 +17,19 @@ use arrow::datatypes::{DataType, Field, FieldRef}; use datafusion::logical_expr::ColumnarValue; +use datafusion::physical_expr::PhysicalExpr; use datafusion_common::plan_err; use datafusion_expr::function::AccumulatorArgs; use datafusion_expr::{ - Accumulator, AggregateUDFImpl, PartitionEvaluator, ScalarFunctionArgs, ScalarUDFImpl, - Signature, Volatility, WindowUDFImpl, + Accumulator, AggregateUDFImpl, LimitEffect, PartitionEvaluator, ScalarFunctionArgs, + ScalarUDFImpl, Signature, Volatility, WindowUDFImpl, }; use datafusion_functions_window_common::field::WindowUDFFieldArgs; use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; use std::any::Any; use std::fmt::Debug; use std::hash::Hash; +use std::sync::Arc; mod roundtrip_logical_plan; mod roundtrip_physical_plan; @@ -173,6 +175,14 @@ impl WindowUDFImpl for CustomUDWF { ) -> datafusion_common::Result { Ok(Field::new(field_args.name(), DataType::UInt64, false).into()) } + + fn is_causal(&self) -> bool { + false + } + + fn limit_effect(&self, _args: &[Arc]) -> LimitEffect { + LimitEffect::Unknown + } } #[derive(Debug)] diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs index 5080a33e0386..e73d1250048d 100644 --- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs @@ -62,6 +62,7 @@ use datafusion::functions_window::expr_fn::{ cume_dist, dense_rank, lag, lead, ntile, percent_rank, rank, row_number, }; use datafusion::functions_window::rank::rank_udwf; +use datafusion::physical_expr::PhysicalExpr; use datafusion::prelude::*; use datafusion::test_util::{TestTableFactory, TestTableProvider}; use datafusion_common::config::TableOptions; @@ -77,10 +78,10 @@ use datafusion_expr::expr::{ }; use datafusion_expr::logical_plan::{Extension, UserDefinedLogicalNodeCore}; use datafusion_expr::{ - Accumulator, AggregateUDF, ColumnarValue, ExprFunctionExt, ExprSchemable, Literal, - LogicalPlan, LogicalPlanBuilder, Operator, PartitionEvaluator, ScalarUDF, Signature, - TryCast, Volatility, WindowFrame, WindowFrameBound, WindowFrameUnits, - WindowFunctionDefinition, WindowUDF, WindowUDFImpl, + Accumulator, AggregateUDF, ColumnarValue, ExprFunctionExt, ExprSchemable, + LimitEffect, Literal, LogicalPlan, LogicalPlanBuilder, Operator, PartitionEvaluator, + ScalarUDF, Signature, TryCast, Volatility, WindowFrame, WindowFrameBound, + WindowFrameUnits, WindowFunctionDefinition, WindowUDF, WindowUDFImpl, }; use datafusion_functions_aggregate::average::avg_udaf; use datafusion_functions_aggregate::expr_fn::{ @@ -2544,6 +2545,14 @@ fn roundtrip_window() { ) } } + + fn is_causal(&self) -> bool { + false + } + + fn limit_effect(&self, _args: &[Arc]) -> LimitEffect { + LimitEffect::Unknown + } } fn make_partition_evaluator() -> Result> { diff --git a/datafusion/sqllogictest/test_files/window_limits.slt b/datafusion/sqllogictest/test_files/window_limits.slt index 425c2850c99c..0b4b0a3a8691 100644 --- a/datafusion/sqllogictest/test_files/window_limits.slt +++ b/datafusion/sqllogictest/test_files/window_limits.slt @@ -56,7 +56,7 @@ statement ok set datafusion.optimizer.enable_window_limits = false; query I -SELECT LEAD(c9) OVER (ORDER BY c9 ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM aggregate_test_100 LIMIT 5 +SELECT LEAD(c9, 1) OVER (ORDER BY c9 ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM aggregate_test_100 LIMIT 5 ---- 63044568 141047417 From 5d774ef2d728f9d00400df6cf5308a77562903f7 Mon Sep 17 00:00:00 2001 From: Brent Gardner Date: Sat, 11 Oct 2025 16:33:55 -0600 Subject: [PATCH 09/30] fix a bug --- datafusion/functions-window/src/lead_lag.rs | 1 + datafusion/physical-optimizer/src/limit_pushdown_past_window.rs | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/datafusion/functions-window/src/lead_lag.rs b/datafusion/functions-window/src/lead_lag.rs index 8ef866c1699a..6c10decd2606 100644 --- a/datafusion/functions-window/src/lead_lag.rs +++ b/datafusion/functions-window/src/lead_lag.rs @@ -327,6 +327,7 @@ impl WindowUDFImpl for WindowShift { }; LimitEffect::Relative((*amount).max(0) as usize) } + [_] => LimitEffect::Relative(1), // default value _ => LimitEffect::Unknown, // invalid arguments } } diff --git a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs index b086c26a4d58..5bd1e0635084 100644 --- a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs +++ b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs @@ -211,5 +211,3 @@ fn bound_to_usize(bound: &WindowFrameBound) -> Option { _ => None, } } - -// tests: all branches are covered by sqllogictests From 806c2ba5b0251f87880196c786f2ae4875fcad6e Mon Sep 17 00:00:00 2001 From: Brent Gardner Date: Sun, 12 Oct 2025 06:44:45 -0600 Subject: [PATCH 10/30] repartitions --- .../src/limit_pushdown_past_window.rs | 5 + .../sqllogictest/test_files/window_limits.slt | 157 ++++++++++++++---- 2 files changed, 129 insertions(+), 33 deletions(-) diff --git a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs index 5bd1e0635084..8331a9e1a374 100644 --- a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs +++ b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs @@ -81,6 +81,11 @@ impl PhysicalOptimizerRule for LimitPushPastWindows { latest_max = 0; return Ok(Transformed::no(node)); } + if let Some(limit) = node.as_any().downcast_ref::() { + latest_limit = limit.fetch(); + latest_max = 0; + return Ok(Transformed::no(node)); + } // grow the limit if we hit a window function if let Some(window) = node.as_any().downcast_ref::() { diff --git a/datafusion/sqllogictest/test_files/window_limits.slt b/datafusion/sqllogictest/test_files/window_limits.slt index 0b4b0a3a8691..1c5d248f6ccd 100644 --- a/datafusion/sqllogictest/test_files/window_limits.slt +++ b/datafusion/sqllogictest/test_files/window_limits.slt @@ -15,63 +15,154 @@ # specific language governing permissions and limitations # under the License. +# see https://datafusion.apache.org/user-guide/sql/window_functions.html#syntax for field names & examples statement ok -CREATE EXTERNAL TABLE aggregate_test_100 ( - c1 VARCHAR NOT NULL, +CREATE EXTERNAL TABLE employees ( + depname VARCHAR NOT NULL, c2 TINYINT NOT NULL, c3 SMALLINT NOT NULL, c4 SMALLINT, c5 INT, c6 BIGINT NOT NULL, c7 SMALLINT NOT NULL, - c8 INT NOT NULL, - c9 BIGINT UNSIGNED NOT NULL, + empno INT NOT NULL, + salary BIGINT UNSIGNED NOT NULL, c10 VARCHAR NOT NULL, c11 FLOAT NOT NULL, c12 DOUBLE NOT NULL, - c13 VARCHAR NOT NULL + c13 VARCHAR NOT NULL, + hire_date DATE NOT NULL, + c15 TIMESTAMP NOT NULL, ) STORED AS CSV -LOCATION '../../testing/data/csv/aggregate_test_100.csv' +LOCATION '../../testing/data/csv/aggregate_test_100_with_dates.csv' OPTIONS ('format.has_header' 'true'); +# lead defaults to 1 and should grow limit statement ok -CREATE EXTERNAL TABLE null_cases( - c1 BIGINT NULL, - c2 DOUBLE NULL, - c3 BIGINT NULL -) -STORED AS CSV -LOCATION '../core/tests/data/null_cases.csv' -OPTIONS ('format.has_header' 'true'); +set datafusion.optimizer.enable_window_limits = false; + +query I +SELECT LEAD(empno) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM employees LIMIT 3 +---- +299 +363 +417 -### This is the same table as -### execute_with_partition with 4 partitions statement ok -CREATE EXTERNAL TABLE test (c1 int, c2 bigint, c3 boolean) -STORED AS CSV LOCATION '../core/tests/data/partitioned_csv' -OPTIONS('format.has_header' 'false'); +set datafusion.optimizer.enable_window_limits = true; +query I +SELECT LEAD(empno) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM employees LIMIT 3 +---- +299 +363 +417 + +query TT +EXPLAIN +SELECT LEAD(empno) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM employees LIMIT 3 +---- +logical_plan +01)Projection: lead(employees.empno) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW +02)--Limit: skip=0, fetch=3 +03)----WindowAggr: windowExpr=[[lead(employees.empno) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] +04)------TableScan: employees projection=[empno] +physical_plan +01)ProjectionExec: expr=[lead(employees.empno) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as lead(employees.empno) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW] +02)--GlobalLimitExec: skip=0, fetch=3 +03)----BoundedWindowAggExec: wdw=[lead(employees.empno) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "lead(employees.empno) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] +04)------SortExec: TopK(fetch=4), expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false] +05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno], file_type=csv, has_header=true + +# should handle partition by unoptimized statement ok set datafusion.optimizer.enable_window_limits = false; -query I -SELECT LEAD(c9, 1) OVER (ORDER BY c9 ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM aggregate_test_100 LIMIT 5 +query TIII +SELECT depname, empno, salary, SUM(salary) OVER ( + PARTITION BY depname + ORDER BY empno + ROWS BETWEEN 1 PRECEDING AND CURRENT ROW + ) AS running_sum +FROM employees +ORDER BY depname +LIMIT 5 ---- -63044568 -141047417 -141680161 -145294611 -225513085 +a 102 3276123488 3276123488 +a 363 1865307672 5141431160 +a 829 4015442341 5880750013 +a 2555 145294611 4160736952 +a 2809 754775609 900070220 +query TT +EXPLAIN +SELECT depname, empno, salary, SUM(salary) OVER ( + PARTITION BY depname + ORDER BY empno + ROWS BETWEEN 1 PRECEDING AND CURRENT ROW + ) AS running_sum +FROM employees +ORDER BY depname +LIMIT 5 +---- +logical_plan +01)Sort: employees.depname ASC NULLS LAST, fetch=5 +02)--Projection: employees.depname, employees.empno, employees.salary, sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW AS running_sum +03)----WindowAggr: windowExpr=[[sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW]] +04)------TableScan: employees projection=[depname, empno, salary] +physical_plan +01)SortPreservingMergeExec: [depname@0 ASC NULLS LAST], fetch=5 +02)--ProjectionExec: expr=[depname@0 as depname, empno@1 as empno, salary@2 as salary, sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW@3 as running_sum] +03)----BoundedWindowAggExec: wdw=[sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW: Field { name: "sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] +04)------SortExec: expr=[depname@0 ASC NULLS LAST, empno@1 ASC NULLS LAST], preserve_partitioning=[true] +05)--------CoalesceBatchesExec: target_batch_size=8192 +06)----------RepartitionExec: partitioning=Hash([depname@0], 4), input_partitions=4 +07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[depname, empno, salary], file_type=csv, has_header=true + +# should handle partition by optimized statement ok set datafusion.optimizer.enable_window_limits = true; -query I -SELECT LEAD(c9) OVER (ORDER BY c9 ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM aggregate_test_100 LIMIT 5 +query TIII +SELECT depname, empno, salary, SUM(salary) OVER ( + PARTITION BY depname + ORDER BY empno + ROWS BETWEEN 1 PRECEDING AND CURRENT ROW + ) AS running_sum +FROM employees +ORDER BY depname +LIMIT 5 +---- +a 102 3276123488 3276123488 +a 363 1865307672 5141431160 +a 829 4015442341 5880750013 +a 2555 145294611 4160736952 +a 2809 754775609 900070220 + +query TT +EXPLAIN +SELECT depname, empno, salary, SUM(salary) OVER ( + PARTITION BY depname + ORDER BY empno + ROWS BETWEEN 1 PRECEDING AND CURRENT ROW + ) AS running_sum +FROM employees +ORDER BY depname +LIMIT 5 ---- -63044568 -141047417 -141680161 -145294611 -225513085 +logical_plan +01)Sort: employees.depname ASC NULLS LAST, fetch=5 +02)--Projection: employees.depname, employees.empno, employees.salary, sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW AS running_sum +03)----WindowAggr: windowExpr=[[sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW]] +04)------TableScan: employees projection=[depname, empno, salary] +physical_plan +01)SortPreservingMergeExec: [depname@0 ASC NULLS LAST], fetch=5 +02)--ProjectionExec: expr=[depname@0 as depname, empno@1 as empno, salary@2 as salary, sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW@3 as running_sum] +03)----BoundedWindowAggExec: wdw=[sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW: Field { name: "sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] +04)------SortExec: TopK(fetch=5), expr=[depname@0 ASC NULLS LAST, empno@1 ASC NULLS LAST], preserve_partitioning=[true] +05)--------CoalesceBatchesExec: target_batch_size=8192 +06)----------RepartitionExec: partitioning=Hash([depname@0], 4), input_partitions=4 +07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[depname, empno, salary], file_type=csv, has_header=true From 6ffceeeee512e57ec2f2143d05bca43992a0fc89 Mon Sep 17 00:00:00 2001 From: Brent Gardner Date: Sun, 12 Oct 2025 10:52:13 -0600 Subject: [PATCH 11/30] refactor --- .../src/limit_pushdown_past_window.rs | 58 ++++++++++++------- 1 file changed, 37 insertions(+), 21 deletions(-) diff --git a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs index 8331a9e1a374..3d9b85a3ae3d 100644 --- a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs +++ b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs @@ -29,10 +29,11 @@ use datafusion_physical_plan::limit::GlobalLimitExec; use datafusion_physical_plan::sorts::sort::SortExec; use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec; use datafusion_physical_plan::windows::{BoundedWindowAggExec, WindowUDFExpr}; -use datafusion_physical_plan::ExecutionPlan; +use datafusion_physical_plan::{ExecutionPlan, ExecutionPlanProperties}; use std::cmp; use std::cmp::max; use std::sync::Arc; +use datafusion_physical_plan::repartition::RepartitionExec; /// This rule inspects [`ExecutionPlan`]'s attempting to find fetch limits that were not pushed /// down by `LimitPushdown` because [BoundedWindowAggExec]s were "in the way". If the window is @@ -57,7 +58,7 @@ impl PhysicalOptimizerRule for LimitPushPastWindows { return Ok(original); } let mut latest_limit: Option = None; - let mut latest_max = 0; + let mut lookahead = 0; let result = original.transform_down(|node| { // helper closure to DRY out most the early return cases let mut reset = |node, @@ -72,18 +73,18 @@ impl PhysicalOptimizerRule for LimitPushPastWindows { // traversing sides of joins will require more thought if node.children().len() > 1 { - return reset(node, &mut latest_max); + return reset(node, &mut lookahead); } // grab the latest limit we see if let Some(limit) = node.as_any().downcast_ref::() { latest_limit = limit.fetch().map(|fetch| fetch + limit.skip()); - latest_max = 0; + lookahead = 0; return Ok(Transformed::no(node)); } if let Some(limit) = node.as_any().downcast_ref::() { latest_limit = limit.fetch(); - latest_max = 0; + lookahead = 0; return Ok(Transformed::no(node)); } @@ -95,7 +96,7 @@ impl PhysicalOptimizerRule for LimitPushPastWindows { // grow based on function requirements match grow_limit(expr) { LimitEffect::None => {} - LimitEffect::Unknown => return reset(node, &mut latest_max), + LimitEffect::Unknown => return reset(node, &mut lookahead), LimitEffect::Relative(rel) => max_rel = max_rel.max(rel), LimitEffect::Absolute(abs) => max_abs = max_abs.max(abs), } @@ -103,17 +104,17 @@ impl PhysicalOptimizerRule for LimitPushPastWindows { // grow based on frames let frame = expr.get_window_frame(); if frame.units != WindowFrameUnits::Rows { - return reset(node, &mut latest_max); // expression-based limits? + return reset(node, &mut lookahead); // expression-based limits? } let Some(end_bound) = bound_to_usize(&frame.end_bound) else { - return reset(node, &mut latest_max); + return reset(node, &mut lookahead); }; - latest_max = max(end_bound, latest_max); + lookahead = max(end_bound, lookahead); } // finish grow - latest_max = latest_max.max(latest_max + max_rel); - latest_max = latest_max.max(max_abs); + lookahead = lookahead.max(lookahead + max_rel); + lookahead = lookahead.max(max_abs); return Ok(Transformed::no(node)); } @@ -121,15 +122,15 @@ impl PhysicalOptimizerRule for LimitPushPastWindows { if let Some(spm) = node.as_any().downcast_ref::() { let latest = latest_limit.take(); let Some(fetch) = latest else { - latest_max = 0; + lookahead = 0; return Ok(Transformed::no(node)); }; let fetch = match spm.fetch() { - None => fetch + latest_max, - Some(existing) => cmp::min(existing, fetch + latest_max), + None => fetch + lookahead, + Some(existing) => cmp::min(existing, fetch + lookahead), }; let spm: Arc = spm.with_fetch(Some(fetch)).unwrap(); - latest_max = 0; + lookahead = 0; return Ok(Transformed::complete(spm)); } @@ -137,22 +138,37 @@ impl PhysicalOptimizerRule for LimitPushPastWindows { if let Some(sort) = node.as_any().downcast_ref::() { let latest = latest_limit.take(); let Some(fetch) = latest else { - latest_max = 0; + lookahead = 0; return Ok(Transformed::no(node)); }; let fetch = match sort.fetch() { - None => fetch + latest_max, - Some(existing) => cmp::min(existing, fetch + latest_max), + None => fetch + lookahead, + Some(existing) => cmp::min(existing, fetch + lookahead), }; let sort: Arc = Arc::new(sort.with_fetch(Some(fetch))); - latest_max = 0; + lookahead = 0; return Ok(Transformed::complete(sort)); } - // we can't push the limit past nodes that decrease row count + // nodes along the way + if !node.supports_limit_pushdown() { + // TODO: avoid combinatorial explosion of rules*nodes + // instead of white listing rules (supports push_down rule) + // use properties like CardinalityEffect, or LimitEffect + return reset(node, &mut lookahead); + } + if let Some(part) = node.as_any().downcast_ref::() { + let output = part.partitioning().partition_count(); + let input = part.input().output_partitioning().partition_count(); + if output < input { + return reset(node, &mut lookahead); + } + } match node.cardinality_effect() { + CardinalityEffect::Unknown => return reset(node, &mut lookahead), + CardinalityEffect::LowerEqual => return reset(node, &mut lookahead), CardinalityEffect::Equal => {} - _ => return reset(node, &mut latest_max), + CardinalityEffect::GreaterEqual => {} } Ok(Transformed::no(node)) From e9b1e32eac615d574765b5458a12ab6e65594326 Mon Sep 17 00:00:00 2001 From: Brent Gardner Date: Sun, 12 Oct 2025 10:59:18 -0600 Subject: [PATCH 12/30] refactor --- datafusion/expr/src/udwf.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs index 7f692182b0a1..f6d6c61babc9 100644 --- a/datafusion/expr/src/udwf.rs +++ b/datafusion/expr/src/udwf.rs @@ -422,7 +422,7 @@ pub trait WindowUDFImpl: Debug + DynEq + DynHash + Send + Sync { fn limit_effect(&self, args: &[Arc]) -> LimitEffect; } -/// the effect this function will have on the window +/// the effect this function will have on the limit pushdown pub enum LimitEffect { /// Does not affect the limit (i.e. this is causal) None, From fb19f606baa8673e801c004c378488ce200a5fef Mon Sep 17 00:00:00 2001 From: Brent Gardner Date: Sun, 12 Oct 2025 12:37:59 -0600 Subject: [PATCH 13/30] fmt --- datafusion/functions-window/src/lead_lag.rs | 2 +- datafusion/physical-optimizer/src/limit_pushdown_past_window.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/functions-window/src/lead_lag.rs b/datafusion/functions-window/src/lead_lag.rs index 6c10decd2606..832c8d0cec4f 100644 --- a/datafusion/functions-window/src/lead_lag.rs +++ b/datafusion/functions-window/src/lead_lag.rs @@ -328,7 +328,7 @@ impl WindowUDFImpl for WindowShift { LimitEffect::Relative((*amount).max(0) as usize) } [_] => LimitEffect::Relative(1), // default value - _ => LimitEffect::Unknown, // invalid arguments + _ => LimitEffect::Unknown, // invalid arguments } } } diff --git a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs index 3d9b85a3ae3d..d8af61ff9e49 100644 --- a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs +++ b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs @@ -26,6 +26,7 @@ use datafusion_physical_expr::window::{ }; use datafusion_physical_plan::execution_plan::CardinalityEffect; use datafusion_physical_plan::limit::GlobalLimitExec; +use datafusion_physical_plan::repartition::RepartitionExec; use datafusion_physical_plan::sorts::sort::SortExec; use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec; use datafusion_physical_plan::windows::{BoundedWindowAggExec, WindowUDFExpr}; @@ -33,7 +34,6 @@ use datafusion_physical_plan::{ExecutionPlan, ExecutionPlanProperties}; use std::cmp; use std::cmp::max; use std::sync::Arc; -use datafusion_physical_plan::repartition::RepartitionExec; /// This rule inspects [`ExecutionPlan`]'s attempting to find fetch limits that were not pushed /// down by `LimitPushdown` because [BoundedWindowAggExec]s were "in the way". If the window is From 0023a5d691fa17b32abd82ddc418589a1f8b9c63 Mon Sep 17 00:00:00 2001 From: Brent Gardner Date: Sun, 12 Oct 2025 13:10:56 -0600 Subject: [PATCH 14/30] remove casual --- datafusion-examples/examples/advanced_udwf.rs | 8 ------- .../user_defined_window_functions.rs | 12 ---------- datafusion/expr/src/expr_fn.rs | 4 ---- datafusion/expr/src/udwf.rs | 22 +++++-------------- datafusion/ffi/src/udwf/mod.rs | 4 ---- datafusion/functions-window/src/cume_dist.rs | 4 ---- datafusion/functions-window/src/lead_lag.rs | 7 ------ datafusion/functions-window/src/nth_value.rs | 4 ---- datafusion/functions-window/src/ntile.rs | 4 ---- datafusion/functions-window/src/rank.rs | 8 ------- datafusion/functions-window/src/row_number.rs | 4 ---- .../simplify_expressions/expr_simplifier.rs | 4 ---- .../src/limit_pushdown_past_window.rs | 6 +---- datafusion/proto/tests/cases/mod.rs | 4 ---- .../tests/cases/roundtrip_logical_plan.rs | 4 ---- datafusion/sqllogictest/test_files/window.slt | 8 +++---- 16 files changed, 11 insertions(+), 96 deletions(-) diff --git a/datafusion-examples/examples/advanced_udwf.rs b/datafusion-examples/examples/advanced_udwf.rs index 065f961bb852..ba4c377fd676 100644 --- a/datafusion-examples/examples/advanced_udwf.rs +++ b/datafusion-examples/examples/advanced_udwf.rs @@ -93,10 +93,6 @@ impl WindowUDFImpl for SmoothItUdf { Ok(Field::new(field_args.name(), DataType::Float64, true).into()) } - fn is_causal(&self) -> bool { - false - } - fn limit_effect(&self, _args: &[Arc]) -> LimitEffect { LimitEffect::Unknown } @@ -221,10 +217,6 @@ impl WindowUDFImpl for SimplifySmoothItUdf { Ok(Field::new(field_args.name(), DataType::Float64, true).into()) } - fn is_causal(&self) -> bool { - false - } - fn limit_effect(&self, _args: &[Arc]) -> LimitEffect { LimitEffect::Unknown } diff --git a/datafusion/core/tests/user_defined/user_defined_window_functions.rs b/datafusion/core/tests/user_defined/user_defined_window_functions.rs index c9be847bda72..33607ebc0d2c 100644 --- a/datafusion/core/tests/user_defined/user_defined_window_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_window_functions.rs @@ -574,10 +574,6 @@ impl OddCounter { Ok(Field::new(field_args.name(), DataType::Int64, true).into()) } - fn is_causal(&self) -> bool { - false - } - fn limit_effect(&self, _args: &[Arc]) -> LimitEffect { LimitEffect::Unknown } @@ -701,10 +697,6 @@ impl WindowUDFImpl for VariadicWindowUDF { unimplemented!("unnecessary for testing"); } - fn is_causal(&self) -> bool { - false - } - fn limit_effect(&self, _args: &[Arc]) -> LimitEffect { LimitEffect::Unknown } @@ -862,10 +854,6 @@ impl WindowUDFImpl for MetadataBasedWindowUdf { .into()) } - fn is_causal(&self) -> bool { - false - } - fn limit_effect(&self, _args: &[Arc]) -> LimitEffect { LimitEffect::Unknown } diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index e095f8d4b19a..4666411dd540 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -707,10 +707,6 @@ impl WindowUDFImpl for SimpleWindowUDF { ))) } - fn is_causal(&self) -> bool { - false - } - fn limit_effect(&self, _args: &[Arc]) -> LimitEffect { LimitEffect::Unknown } diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs index f6d6c61babc9..f2467a76fa3f 100644 --- a/datafusion/expr/src/udwf.rs +++ b/datafusion/expr/src/udwf.rs @@ -244,11 +244,13 @@ where /// # use std::sync::LazyLock; /// # use arrow::datatypes::{DataType, Field, FieldRef}; /// # use datafusion_common::{DataFusionError, plan_err, Result}; -/// # use datafusion_expr::{col, Signature, Volatility, PartitionEvaluator, WindowFrame, ExprFunctionExt, Documentation}; +/// # use datafusion_expr::{col, Signature, Volatility, PartitionEvaluator, WindowFrame, ExprFunctionExt, Documentation, LimitEffect}; /// # use datafusion_expr::{WindowUDFImpl, WindowUDF}; /// # use datafusion_functions_window_common::field::WindowUDFFieldArgs; /// # use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; /// # use datafusion_expr::window_doc_sections::DOC_SECTION_ANALYTICAL; +/// # use datafusion_physical_expr_common::physical_expr; +/// # use std::sync::Arc; /// /// #[derive(Debug, Clone, PartialEq, Eq, Hash)] /// struct SmoothIt { @@ -295,6 +297,9 @@ where /// fn documentation(&self) -> Option<&Documentation> { /// Some(get_doc()) /// } +/// fn limit_effect(&self, _args: &[Arc]) -> LimitEffect { +/// LimitEffect::Unknown +/// } /// } /// /// // Create a new WindowUDF from the implementation @@ -415,9 +420,6 @@ pub trait WindowUDFImpl: Debug + DynEq + DynHash + Send + Sync { None } - /// Returns true if this function only needs access to current and previous rows - fn is_causal(&self) -> bool; - /// If not causal, returns the effect this function will have on the window fn limit_effect(&self, args: &[Arc]) -> LimitEffect; } @@ -542,10 +544,6 @@ impl WindowUDFImpl for AliasedWindowUDFImpl { self.inner.documentation() } - fn is_causal(&self) -> bool { - self.inner.is_causal() - } - fn limit_effect(&self, args: &[Arc]) -> LimitEffect { self.inner.limit_effect(args) } @@ -603,10 +601,6 @@ mod test { unimplemented!() } - fn is_causal(&self) -> bool { - false - } - fn limit_effect(&self, _args: &[Arc]) -> LimitEffect { LimitEffect::Unknown } @@ -650,10 +644,6 @@ mod test { unimplemented!() } - fn is_causal(&self) -> bool { - false - } - fn limit_effect(&self, _args: &[Arc]) -> LimitEffect { LimitEffect::Unknown } diff --git a/datafusion/ffi/src/udwf/mod.rs b/datafusion/ffi/src/udwf/mod.rs index 97015baff196..9f56e2d4788b 100644 --- a/datafusion/ffi/src/udwf/mod.rs +++ b/datafusion/ffi/src/udwf/mod.rs @@ -352,10 +352,6 @@ impl WindowUDFImpl for ForeignWindowUDF { options.map(|s| s.into()) } - fn is_causal(&self) -> bool { - false - } - fn limit_effect(&self, _args: &[Arc]) -> LimitEffect { LimitEffect::Unknown } diff --git a/datafusion/functions-window/src/cume_dist.rs b/datafusion/functions-window/src/cume_dist.rs index 377c2194a884..372086b12d5e 100644 --- a/datafusion/functions-window/src/cume_dist.rs +++ b/datafusion/functions-window/src/cume_dist.rs @@ -112,10 +112,6 @@ impl WindowUDFImpl for CumeDist { self.doc() } - fn is_causal(&self) -> bool { - false - } - fn limit_effect(&self, _args: &[Arc]) -> LimitEffect { LimitEffect::Unknown } diff --git a/datafusion/functions-window/src/lead_lag.rs b/datafusion/functions-window/src/lead_lag.rs index 832c8d0cec4f..3910a0be574d 100644 --- a/datafusion/functions-window/src/lead_lag.rs +++ b/datafusion/functions-window/src/lead_lag.rs @@ -305,13 +305,6 @@ impl WindowUDFImpl for WindowShift { } } - fn is_causal(&self) -> bool { - match self.kind { - WindowShiftKind::Lag => true, - WindowShiftKind::Lead => false, - } - } - fn limit_effect(&self, args: &[Arc]) -> LimitEffect { if self.kind == WindowShiftKind::Lag { return LimitEffect::None; diff --git a/datafusion/functions-window/src/nth_value.rs b/datafusion/functions-window/src/nth_value.rs index 47ebc44f3925..82a9f8a737d6 100644 --- a/datafusion/functions-window/src/nth_value.rs +++ b/datafusion/functions-window/src/nth_value.rs @@ -344,10 +344,6 @@ impl WindowUDFImpl for NthValue { } } - fn is_causal(&self) -> bool { - false - } - fn limit_effect(&self, args: &[Arc]) -> LimitEffect { match args { [_, expr, ..] => { diff --git a/datafusion/functions-window/src/ntile.rs b/datafusion/functions-window/src/ntile.rs index add913faaa3d..d188db3bbf59 100644 --- a/datafusion/functions-window/src/ntile.rs +++ b/datafusion/functions-window/src/ntile.rs @@ -159,10 +159,6 @@ impl WindowUDFImpl for Ntile { self.doc() } - fn is_causal(&self) -> bool { - false - } - fn limit_effect(&self, _args: &[Arc]) -> LimitEffect { LimitEffect::Unknown } diff --git a/datafusion/functions-window/src/rank.rs b/datafusion/functions-window/src/rank.rs index 6d0fcd0a0cdc..6d891e76671d 100644 --- a/datafusion/functions-window/src/rank.rs +++ b/datafusion/functions-window/src/rank.rs @@ -242,14 +242,6 @@ impl WindowUDFImpl for Rank { } } - fn is_causal(&self) -> bool { - match self.rank_type { - RankType::Basic => true, - RankType::Dense => true, - RankType::Percent => false, - } - } - fn limit_effect(&self, _args: &[Arc]) -> LimitEffect { match self.rank_type { RankType::Basic => LimitEffect::None, diff --git a/datafusion/functions-window/src/row_number.rs b/datafusion/functions-window/src/row_number.rs index 8a6306b87a78..d7d298cecead 100644 --- a/datafusion/functions-window/src/row_number.rs +++ b/datafusion/functions-window/src/row_number.rs @@ -124,10 +124,6 @@ impl WindowUDFImpl for RowNumber { self.doc() } - fn is_causal(&self) -> bool { - true - } - fn limit_effect(&self, _args: &[Arc]) -> LimitEffect { LimitEffect::None } diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index cf849464fff3..c40906239073 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -4867,10 +4867,6 @@ mod tests { unimplemented!("not needed for tests") } - fn is_causal(&self) -> bool { - false - } - fn limit_effect(&self, _args: &[Arc]) -> LimitEffect { LimitEffect::Unknown } diff --git a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs index d8af61ff9e49..4d13c83cfdc2 100644 --- a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs +++ b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs @@ -214,11 +214,7 @@ fn grow_limit(expr: &Arc) -> LimitEffect { return LimitEffect::Unknown; // should be only remaining type }; - // Return it's effect - let fun = udf.fun().inner(); - if fun.is_causal() { - return LimitEffect::None; - } + // Return its effect udf.limit_effect() } diff --git a/datafusion/proto/tests/cases/mod.rs b/datafusion/proto/tests/cases/mod.rs index 98740917eb6a..aec6c1de3030 100644 --- a/datafusion/proto/tests/cases/mod.rs +++ b/datafusion/proto/tests/cases/mod.rs @@ -176,10 +176,6 @@ impl WindowUDFImpl for CustomUDWF { Ok(Field::new(field_args.name(), DataType::UInt64, false).into()) } - fn is_causal(&self) -> bool { - false - } - fn limit_effect(&self, _args: &[Arc]) -> LimitEffect { LimitEffect::Unknown } diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs index e73d1250048d..c5d4b49092d9 100644 --- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs @@ -2546,10 +2546,6 @@ fn roundtrip_window() { } } - fn is_causal(&self) -> bool { - false - } - fn limit_effect(&self, _args: &[Arc]) -> LimitEffect { LimitEffect::Unknown } diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt index fb9ad1780f6d..f1a708d84dd3 100644 --- a/datafusion/sqllogictest/test_files/window.slt +++ b/datafusion/sqllogictest/test_files/window.slt @@ -5463,7 +5463,7 @@ order by c1, c2, rank; query TT explain select c1, c2, rank1, rank2 from ( - select c1, c2, rank() over (partition by c1 order by c2) as rank1, + select c1, c2, rank() over (partition by c1 order by c2) as rank1, rank() over (partition by c2, c1 order by c1) as rank2 from t1 ) @@ -5497,7 +5497,7 @@ physical_plan query IIII select c1, c2, rank1, rank2 from ( - select c1, c2, rank() over (partition by c1 order by c2) as rank1, + select c1, c2, rank() over (partition by c1 order by c2) as rank1, rank() over (partition by c2, c1 order by c1) as rank2 from t1 ) @@ -5514,7 +5514,7 @@ order by c1, c2, rank1, rank2; query TT explain select c1, c2, rank1, rank2 from ( - select c1, c2, rank() over (partition by c1 order by c2) as rank1, + select c1, c2, rank() over (partition by c1 order by c2) as rank1, rank() over (partition by c2, c1 order by c1) as rank2 from t1 ) @@ -5548,7 +5548,7 @@ physical_plan query IIII select c1, c2, rank1, rank2 from ( - select c1, c2, rank() over (partition by c1 order by c2) as rank1, + select c1, c2, rank() over (partition by c1 order by c2) as rank1, rank() over (partition by c2, c1 order by c1) as rank2 from t1 ) From c19fb175766105f8a30dad34965111f13d0792ea Mon Sep 17 00:00:00 2001 From: Brent Gardner Date: Mon, 13 Oct 2025 11:01:57 -0600 Subject: [PATCH 15/30] two phased approach --- datafusion/expr/src/udwf.rs | 4 +- .../src/limit_pushdown_past_window.rs | 81 +++++++++++-------- 2 files changed, 49 insertions(+), 36 deletions(-) diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs index f2467a76fa3f..448b84d758b7 100644 --- a/datafusion/expr/src/udwf.rs +++ b/datafusion/expr/src/udwf.rs @@ -421,7 +421,9 @@ pub trait WindowUDFImpl: Debug + DynEq + DynHash + Send + Sync { } /// If not causal, returns the effect this function will have on the window - fn limit_effect(&self, args: &[Arc]) -> LimitEffect; + fn limit_effect(&self, _args: &[Arc]) -> LimitEffect { + LimitEffect::Unknown + } } /// the effect this function will have on the limit pushdown diff --git a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs index 4d13c83cfdc2..2e9da6697ff3 100644 --- a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs +++ b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs @@ -59,6 +59,7 @@ impl PhysicalOptimizerRule for LimitPushPastWindows { } let mut latest_limit: Option = None; let mut lookahead = 0; + let mut applying = false; let result = original.transform_down(|node| { // helper closure to DRY out most the early return cases let mut reset = |node, @@ -77,19 +78,24 @@ impl PhysicalOptimizerRule for LimitPushPastWindows { } // grab the latest limit we see - if let Some(limit) = node.as_any().downcast_ref::() { - latest_limit = limit.fetch().map(|fetch| fetch + limit.skip()); - lookahead = 0; - return Ok(Transformed::no(node)); - } - if let Some(limit) = node.as_any().downcast_ref::() { - latest_limit = limit.fetch(); - lookahead = 0; - return Ok(Transformed::no(node)); + if !applying { + if let Some(limit) = node.as_any().downcast_ref::() { + latest_limit = limit.fetch().map(|fetch| fetch + limit.skip()); + lookahead = 0; + return Ok(Transformed::no(node)); + } + if let Some(limit) = + node.as_any().downcast_ref::() + { + latest_limit = limit.fetch(); + lookahead = 0; + return Ok(Transformed::no(node)); + } } // grow the limit if we hit a window function if let Some(window) = node.as_any().downcast_ref::() { + applying = true; let mut max_rel = 0; let mut max_abs = 0; for expr in window.window_expr().iter() { @@ -119,35 +125,40 @@ impl PhysicalOptimizerRule for LimitPushPastWindows { } // Apply the limit if we hit a sortpreservingmerge node - if let Some(spm) = node.as_any().downcast_ref::() { - let latest = latest_limit.take(); - let Some(fetch) = latest else { + if applying { + if let Some(spm) = node.as_any().downcast_ref::() + { + let latest = latest_limit.take(); + let Some(fetch) = latest else { + lookahead = 0; + return Ok(Transformed::no(node)); + }; + let fetch = match spm.fetch() { + None => fetch + lookahead, + Some(existing) => cmp::min(existing, fetch + lookahead), + }; + let spm: Arc = + spm.with_fetch(Some(fetch)).unwrap(); lookahead = 0; - return Ok(Transformed::no(node)); - }; - let fetch = match spm.fetch() { - None => fetch + lookahead, - Some(existing) => cmp::min(existing, fetch + lookahead), - }; - let spm: Arc = spm.with_fetch(Some(fetch)).unwrap(); - lookahead = 0; - return Ok(Transformed::complete(spm)); - } + return Ok(Transformed::complete(spm)); + } - // Apply the limit if we hit a sort node - if let Some(sort) = node.as_any().downcast_ref::() { - let latest = latest_limit.take(); - let Some(fetch) = latest else { + // Apply the limit if we hit a sort node + if let Some(sort) = node.as_any().downcast_ref::() { + let latest = latest_limit.take(); + let Some(fetch) = latest else { + lookahead = 0; + return Ok(Transformed::no(node)); + }; + let fetch = match sort.fetch() { + None => fetch + lookahead, + Some(existing) => cmp::min(existing, fetch + lookahead), + }; + let sort: Arc = + Arc::new(sort.with_fetch(Some(fetch))); lookahead = 0; - return Ok(Transformed::no(node)); - }; - let fetch = match sort.fetch() { - None => fetch + lookahead, - Some(existing) => cmp::min(existing, fetch + lookahead), - }; - let sort: Arc = Arc::new(sort.with_fetch(Some(fetch))); - lookahead = 0; - return Ok(Transformed::complete(sort)); + return Ok(Transformed::complete(sort)); + } } // nodes along the way From 8f9ee3bcb99b9e6bb51d8941503209401cf6c97f Mon Sep 17 00:00:00 2001 From: Brent Gardner Date: Mon, 13 Oct 2025 11:24:42 -0600 Subject: [PATCH 16/30] refactor into context --- .../src/limit_pushdown_past_window.rs | 81 +++++++++++-------- 1 file changed, 48 insertions(+), 33 deletions(-) diff --git a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs index 2e9da6697ff3..a05a1eda659c 100644 --- a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs +++ b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs @@ -48,6 +48,29 @@ impl LimitPushPastWindows { } } +enum Phase { + Find, + Grow, + Apply +} + +#[derive(Default)] +struct RuleContext { + pub limit: Option, + pub lookahead: usize, +} + +impl RuleContext { + pub fn reset_limit(&mut self, limit: Option) { + self.limit = limit; + self.lookahead = 0; + } + + pub fn max_lookahead(&mut self, new_val: usize) { + self.lookahead = self.lookahead.max(new_val); + } +} + impl PhysicalOptimizerRule for LimitPushPastWindows { fn optimize( &self, @@ -57,38 +80,34 @@ impl PhysicalOptimizerRule for LimitPushPastWindows { if !config.optimizer.enable_window_limits { return Ok(original); } - let mut latest_limit: Option = None; - let mut lookahead = 0; + let mut ctx = RuleContext::default(); let mut applying = false; let result = original.transform_down(|node| { // helper closure to DRY out most the early return cases - let mut reset = |node, - max: &mut usize| + let mut reset = |node, ctx: &mut RuleContext| -> datafusion_common::Result< Transformed>, > { - latest_limit = None; - *max = 0; + ctx.limit = None; + ctx.lookahead = 0; Ok(Transformed::no(node)) }; // traversing sides of joins will require more thought if node.children().len() > 1 { - return reset(node, &mut lookahead); + return reset(node, &mut ctx); } // grab the latest limit we see if !applying { if let Some(limit) = node.as_any().downcast_ref::() { - latest_limit = limit.fetch().map(|fetch| fetch + limit.skip()); - lookahead = 0; + ctx.reset_limit(limit.fetch().map(|fetch| fetch + limit.skip())); return Ok(Transformed::no(node)); } if let Some(limit) = node.as_any().downcast_ref::() { - latest_limit = limit.fetch(); - lookahead = 0; + ctx.reset_limit(limit.fetch()); return Ok(Transformed::no(node)); } } @@ -102,7 +121,7 @@ impl PhysicalOptimizerRule for LimitPushPastWindows { // grow based on function requirements match grow_limit(expr) { LimitEffect::None => {} - LimitEffect::Unknown => return reset(node, &mut lookahead), + LimitEffect::Unknown => return reset(node, &mut ctx), LimitEffect::Relative(rel) => max_rel = max_rel.max(rel), LimitEffect::Absolute(abs) => max_abs = max_abs.max(abs), } @@ -110,17 +129,17 @@ impl PhysicalOptimizerRule for LimitPushPastWindows { // grow based on frames let frame = expr.get_window_frame(); if frame.units != WindowFrameUnits::Rows { - return reset(node, &mut lookahead); // expression-based limits? + return reset(node, &mut ctx); // expression-based limits? } let Some(end_bound) = bound_to_usize(&frame.end_bound) else { - return reset(node, &mut lookahead); + return reset(node, &mut ctx); }; - lookahead = max(end_bound, lookahead); + ctx.lookahead = ctx.lookahead.max(end_bound); } // finish grow - lookahead = lookahead.max(lookahead + max_rel); - lookahead = lookahead.max(max_abs); + ctx.max_lookahead(ctx.lookahead + max_rel); + ctx.max_lookahead(max_abs); return Ok(Transformed::no(node)); } @@ -128,35 +147,31 @@ impl PhysicalOptimizerRule for LimitPushPastWindows { if applying { if let Some(spm) = node.as_any().downcast_ref::() { - let latest = latest_limit.take(); + let latest = ctx.limit.take(); let Some(fetch) = latest else { - lookahead = 0; - return Ok(Transformed::no(node)); + return reset(node, &mut ctx); }; let fetch = match spm.fetch() { - None => fetch + lookahead, - Some(existing) => cmp::min(existing, fetch + lookahead), + None => fetch + ctx.lookahead, + Some(existing) => cmp::min(existing, fetch + ctx.lookahead), }; let spm: Arc = spm.with_fetch(Some(fetch)).unwrap(); - lookahead = 0; return Ok(Transformed::complete(spm)); } // Apply the limit if we hit a sort node if let Some(sort) = node.as_any().downcast_ref::() { - let latest = latest_limit.take(); + let latest = ctx.limit.take(); let Some(fetch) = latest else { - lookahead = 0; - return Ok(Transformed::no(node)); + return reset(node, &mut ctx); }; let fetch = match sort.fetch() { - None => fetch + lookahead, - Some(existing) => cmp::min(existing, fetch + lookahead), + None => fetch + ctx.lookahead, + Some(existing) => cmp::min(existing, fetch + ctx.lookahead), }; let sort: Arc = Arc::new(sort.with_fetch(Some(fetch))); - lookahead = 0; return Ok(Transformed::complete(sort)); } } @@ -166,18 +181,18 @@ impl PhysicalOptimizerRule for LimitPushPastWindows { // TODO: avoid combinatorial explosion of rules*nodes // instead of white listing rules (supports push_down rule) // use properties like CardinalityEffect, or LimitEffect - return reset(node, &mut lookahead); + return reset(node, &mut ctx); } if let Some(part) = node.as_any().downcast_ref::() { let output = part.partitioning().partition_count(); let input = part.input().output_partitioning().partition_count(); if output < input { - return reset(node, &mut lookahead); + return reset(node, &mut ctx); } } match node.cardinality_effect() { - CardinalityEffect::Unknown => return reset(node, &mut lookahead), - CardinalityEffect::LowerEqual => return reset(node, &mut lookahead), + CardinalityEffect::Unknown => return reset(node, &mut ctx), + CardinalityEffect::LowerEqual => return reset(node, &mut ctx), CardinalityEffect::Equal => {} CardinalityEffect::GreaterEqual => {} } From 81ba4de376ce0f6252b09f11db03b3e298837112 Mon Sep 17 00:00:00 2001 From: Brent Gardner Date: Mon, 13 Oct 2025 11:36:12 -0600 Subject: [PATCH 17/30] refactor --- .../src/limit_pushdown_past_window.rs | 44 +++++++++++-------- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs index a05a1eda659c..b593c9e5280e 100644 --- a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs +++ b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs @@ -48,19 +48,19 @@ impl LimitPushPastWindows { } } +#[derive(Eq, PartialEq)] enum Phase { - Find, - Grow, + FindOrGrow, Apply } #[derive(Default)] -struct RuleContext { +struct TraverseState { pub limit: Option, pub lookahead: usize, } -impl RuleContext { +impl TraverseState { pub fn reset_limit(&mut self, limit: Option) { self.limit = limit; self.lookahead = 0; @@ -71,6 +71,21 @@ impl RuleContext { } } +fn get_limit(node: &Arc, ctx: &mut TraverseState) -> bool { + if let Some(limit) = node.as_any().downcast_ref::() { + ctx.reset_limit(limit.fetch().map(|fetch| fetch + limit.skip())); + return true; + } + if let Some(limit) = + node.as_any().downcast_ref::() + { + ctx.reset_limit(limit.fetch()); + return true; + } + + false +} + impl PhysicalOptimizerRule for LimitPushPastWindows { fn optimize( &self, @@ -80,11 +95,11 @@ impl PhysicalOptimizerRule for LimitPushPastWindows { if !config.optimizer.enable_window_limits { return Ok(original); } - let mut ctx = RuleContext::default(); - let mut applying = false; + let mut ctx = TraverseState::default(); + let mut phase = Phase::FindOrGrow; let result = original.transform_down(|node| { // helper closure to DRY out most the early return cases - let mut reset = |node, ctx: &mut RuleContext| + let mut reset = |node, ctx: &mut TraverseState| -> datafusion_common::Result< Transformed>, > { @@ -99,22 +114,15 @@ impl PhysicalOptimizerRule for LimitPushPastWindows { } // grab the latest limit we see - if !applying { - if let Some(limit) = node.as_any().downcast_ref::() { - ctx.reset_limit(limit.fetch().map(|fetch| fetch + limit.skip())); - return Ok(Transformed::no(node)); - } - if let Some(limit) = - node.as_any().downcast_ref::() - { - ctx.reset_limit(limit.fetch()); + if phase == Phase::FindOrGrow { + if get_limit(&node, &mut ctx) { return Ok(Transformed::no(node)); } } // grow the limit if we hit a window function if let Some(window) = node.as_any().downcast_ref::() { - applying = true; + phase = Phase::Apply; let mut max_rel = 0; let mut max_abs = 0; for expr in window.window_expr().iter() { @@ -144,7 +152,7 @@ impl PhysicalOptimizerRule for LimitPushPastWindows { } // Apply the limit if we hit a sortpreservingmerge node - if applying { + if phase == Phase::Apply { if let Some(spm) = node.as_any().downcast_ref::() { let latest = ctx.limit.take(); From 35a0033905636853103d487f453687322eadfab0 Mon Sep 17 00:00:00 2001 From: Brent Gardner Date: Mon, 13 Oct 2025 11:58:52 -0600 Subject: [PATCH 18/30] refactor --- .../src/limit_pushdown_past_window.rs | 93 ++++++++----------- 1 file changed, 40 insertions(+), 53 deletions(-) diff --git a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs index b593c9e5280e..f94508d02fda 100644 --- a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs +++ b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs @@ -32,7 +32,6 @@ use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeE use datafusion_physical_plan::windows::{BoundedWindowAggExec, WindowUDFExpr}; use datafusion_physical_plan::{ExecutionPlan, ExecutionPlanProperties}; use std::cmp; -use std::cmp::max; use std::sync::Arc; /// This rule inspects [`ExecutionPlan`]'s attempting to find fetch limits that were not pushed @@ -51,7 +50,7 @@ impl LimitPushPastWindows { #[derive(Eq, PartialEq)] enum Phase { FindOrGrow, - Apply + Apply, } #[derive(Default)] @@ -71,21 +70,6 @@ impl TraverseState { } } -fn get_limit(node: &Arc, ctx: &mut TraverseState) -> bool { - if let Some(limit) = node.as_any().downcast_ref::() { - ctx.reset_limit(limit.fetch().map(|fetch| fetch + limit.skip())); - return true; - } - if let Some(limit) = - node.as_any().downcast_ref::() - { - ctx.reset_limit(limit.fetch()); - return true; - } - - false -} - impl PhysicalOptimizerRule for LimitPushPastWindows { fn optimize( &self, @@ -99,7 +83,8 @@ impl PhysicalOptimizerRule for LimitPushPastWindows { let mut phase = Phase::FindOrGrow; let result = original.transform_down(|node| { // helper closure to DRY out most the early return cases - let mut reset = |node, ctx: &mut TraverseState| + let reset = |node, + ctx: &mut TraverseState| -> datafusion_common::Result< Transformed>, > { @@ -114,10 +99,8 @@ impl PhysicalOptimizerRule for LimitPushPastWindows { } // grab the latest limit we see - if phase == Phase::FindOrGrow { - if get_limit(&node, &mut ctx) { - return Ok(Transformed::no(node)); - } + if phase == Phase::FindOrGrow && get_limit(&node, &mut ctx) { + return Ok(Transformed::no(node)); } // grow the limit if we hit a window function @@ -153,34 +136,8 @@ impl PhysicalOptimizerRule for LimitPushPastWindows { // Apply the limit if we hit a sortpreservingmerge node if phase == Phase::Apply { - if let Some(spm) = node.as_any().downcast_ref::() - { - let latest = ctx.limit.take(); - let Some(fetch) = latest else { - return reset(node, &mut ctx); - }; - let fetch = match spm.fetch() { - None => fetch + ctx.lookahead, - Some(existing) => cmp::min(existing, fetch + ctx.lookahead), - }; - let spm: Arc = - spm.with_fetch(Some(fetch)).unwrap(); - return Ok(Transformed::complete(spm)); - } - - // Apply the limit if we hit a sort node - if let Some(sort) = node.as_any().downcast_ref::() { - let latest = ctx.limit.take(); - let Some(fetch) = latest else { - return reset(node, &mut ctx); - }; - let fetch = match sort.fetch() { - None => fetch + ctx.lookahead, - Some(existing) => cmp::min(existing, fetch + ctx.lookahead), - }; - let sort: Arc = - Arc::new(sort.with_fetch(Some(fetch))); - return Ok(Transformed::complete(sort)); + if let Some(out) = apply_limit(&node, &mut ctx) { + return Ok(out); } } @@ -219,6 +176,38 @@ impl PhysicalOptimizerRule for LimitPushPastWindows { } } +fn apply_limit( + node: &Arc, + ctx: &mut TraverseState, +) -> Option>> { + if !node.as_any().is::() && !node.as_any().is::() { + return None; + } + let latest = ctx.limit.take(); + let Some(fetch) = latest else { + ctx.limit = None; + ctx.lookahead = 0; + return Some(Transformed::no(node.clone())); + }; + let fetch = match node.fetch() { + None => fetch + ctx.lookahead, + Some(existing) => cmp::min(existing, fetch + ctx.lookahead), + }; + Some(Transformed::complete(node.with_fetch(Some(fetch)).unwrap())) +} + +fn get_limit(node: &Arc, ctx: &mut TraverseState) -> bool { + if let Some(limit) = node.as_any().downcast_ref::() { + ctx.reset_limit(limit.fetch().map(|fetch| fetch + limit.skip())); + return true; + } + if let Some(limit) = node.as_any().downcast_ref::() { + ctx.reset_limit(limit.fetch()); + return true; + } + false +} + /// Examines the `WindowExpr` and decides: /// 1. The expression does not change the window size /// 2. The expression grows it by X amount @@ -230,7 +219,7 @@ impl PhysicalOptimizerRule for LimitPushPastWindows { /// /// # Returns /// -/// `false` if we can't optimize +/// The effect on the limit fn grow_limit(expr: &Arc) -> LimitEffect { // White list aggregates if expr.as_any().is::() @@ -247,8 +236,6 @@ fn grow_limit(expr: &Arc) -> LimitEffect { let Some(udf) = swfe.as_any().downcast_ref::() else { return LimitEffect::Unknown; // should be only remaining type }; - - // Return its effect udf.limit_effect() } From 3e3d026d2c9946f2f6967aeb9dd40b2091bb0a37 Mon Sep 17 00:00:00 2001 From: Brent Gardner Date: Mon, 13 Oct 2025 12:06:44 -0600 Subject: [PATCH 19/30] refactor --- .../src/limit_pushdown_past_window.rs | 57 +++++++++++-------- 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs index f94508d02fda..930bea78ced8 100644 --- a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs +++ b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs @@ -106,31 +106,9 @@ impl PhysicalOptimizerRule for LimitPushPastWindows { // grow the limit if we hit a window function if let Some(window) = node.as_any().downcast_ref::() { phase = Phase::Apply; - let mut max_rel = 0; - let mut max_abs = 0; - for expr in window.window_expr().iter() { - // grow based on function requirements - match grow_limit(expr) { - LimitEffect::None => {} - LimitEffect::Unknown => return reset(node, &mut ctx), - LimitEffect::Relative(rel) => max_rel = max_rel.max(rel), - LimitEffect::Absolute(abs) => max_abs = max_abs.max(abs), - } - - // grow based on frames - let frame = expr.get_window_frame(); - if frame.units != WindowFrameUnits::Rows { - return reset(node, &mut ctx); // expression-based limits? - } - let Some(end_bound) = bound_to_usize(&frame.end_bound) else { - return reset(node, &mut ctx); - }; - ctx.lookahead = ctx.lookahead.max(end_bound); + if !grow_limit(window, &mut ctx) { + return reset(node, &mut ctx); } - - // finish grow - ctx.max_lookahead(ctx.lookahead + max_rel); - ctx.max_lookahead(max_abs); return Ok(Transformed::no(node)); } @@ -176,6 +154,35 @@ impl PhysicalOptimizerRule for LimitPushPastWindows { } } +fn grow_limit(window: &BoundedWindowAggExec, ctx: &mut TraverseState) -> bool { + let mut max_rel = 0; + let mut max_abs = 0; + for expr in window.window_expr().iter() { + // grow based on function requirements + match get_limit_effect(expr) { + LimitEffect::None => {} + LimitEffect::Unknown => return false, + LimitEffect::Relative(rel) => max_rel = max_rel.max(rel), + LimitEffect::Absolute(abs) => max_abs = max_abs.max(abs), + } + + // grow based on frames + let frame = expr.get_window_frame(); + if frame.units != WindowFrameUnits::Rows { + return false; // expression-based limits not statically evaluatable + } + let Some(end_bound) = bound_to_usize(&frame.end_bound) else { + return false; + }; + ctx.lookahead = ctx.lookahead.max(end_bound); + } + + // finish grow + ctx.max_lookahead(ctx.lookahead + max_rel); + ctx.max_lookahead(max_abs); + true +} + fn apply_limit( node: &Arc, ctx: &mut TraverseState, @@ -220,7 +227,7 @@ fn get_limit(node: &Arc, ctx: &mut TraverseState) -> bool { /// # Returns /// /// The effect on the limit -fn grow_limit(expr: &Arc) -> LimitEffect { +fn get_limit_effect(expr: &Arc) -> LimitEffect { // White list aggregates if expr.as_any().is::() || expr.as_any().is::() From 92570a45e81582f2dffa5aab9a0a8460afeaa4bd Mon Sep 17 00:00:00 2001 From: Brent Gardner Date: Mon, 13 Oct 2025 12:07:47 -0600 Subject: [PATCH 20/30] remove comments --- .../physical-optimizer/src/limit_pushdown_past_window.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs index 930bea78ced8..82b3501e9eef 100644 --- a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs +++ b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs @@ -121,9 +121,6 @@ impl PhysicalOptimizerRule for LimitPushPastWindows { // nodes along the way if !node.supports_limit_pushdown() { - // TODO: avoid combinatorial explosion of rules*nodes - // instead of white listing rules (supports push_down rule) - // use properties like CardinalityEffect, or LimitEffect return reset(node, &mut ctx); } if let Some(part) = node.as_any().downcast_ref::() { From 4f761f5f329889e4b8ac28764b24afe9555f85e7 Mon Sep 17 00:00:00 2001 From: Brent Gardner Date: Mon, 13 Oct 2025 12:11:17 -0600 Subject: [PATCH 21/30] remove deps --- Cargo.lock | 1 - datafusion/physical-optimizer/Cargo.toml | 1 - 2 files changed, 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b1275bb7f947..05c70d83b8d6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2452,7 +2452,6 @@ dependencies = [ "datafusion-execution", "datafusion-expr", "datafusion-expr-common", - "datafusion-functions-window", "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", diff --git a/datafusion/physical-optimizer/Cargo.toml b/datafusion/physical-optimizer/Cargo.toml index 0f765ad9b9f3..15466cd86bb0 100644 --- a/datafusion/physical-optimizer/Cargo.toml +++ b/datafusion/physical-optimizer/Cargo.toml @@ -43,7 +43,6 @@ datafusion-common = { workspace = true } datafusion-execution = { workspace = true } datafusion-expr = { workspace = true } datafusion-expr-common = { workspace = true, default-features = true } -datafusion-functions-window = { workspace = true } datafusion-physical-expr = { workspace = true } datafusion-physical-expr-common = { workspace = true } datafusion-physical-plan = { workspace = true } From a97f0d1bd483dd62973b876d0727f106835b2b22 Mon Sep 17 00:00:00 2001 From: Brent Gardner Date: Mon, 13 Oct 2025 12:11:17 -0600 Subject: [PATCH 22/30] Fix NthValue --- datafusion/functions-window/src/nth_value.rs | 17 +---- .../src/limit_pushdown_past_window.rs | 13 ++-- .../sqllogictest/test_files/window_limits.slt | 74 +++++++++++++++++++ 3 files changed, 83 insertions(+), 21 deletions(-) diff --git a/datafusion/functions-window/src/nth_value.rs b/datafusion/functions-window/src/nth_value.rs index 82a9f8a737d6..985d48474ca3 100644 --- a/datafusion/functions-window/src/nth_value.rs +++ b/datafusion/functions-window/src/nth_value.rs @@ -344,21 +344,8 @@ impl WindowUDFImpl for NthValue { } } - fn limit_effect(&self, args: &[Arc]) -> LimitEffect { - match args { - [_, expr, ..] => { - let Some(lit) = expr.as_any().downcast_ref::() - else { - return LimitEffect::Unknown; - }; - let ScalarValue::Int64(Some(amount)) = lit.value() else { - return LimitEffect::Unknown; // we should only get int64 from the parser - }; - let nth_row = (amount - 1).max(0) as usize; - LimitEffect::Absolute(nth_row) - } - _ => LimitEffect::Unknown, // invalid arguments - } + fn limit_effect(&self, _args: &[Arc]) -> LimitEffect { + LimitEffect::None // NthValue is causal } } diff --git a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs index 82b3501e9eef..1c671cd07488 100644 --- a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs +++ b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs @@ -153,14 +153,16 @@ impl PhysicalOptimizerRule for LimitPushPastWindows { fn grow_limit(window: &BoundedWindowAggExec, ctx: &mut TraverseState) -> bool { let mut max_rel = 0; - let mut max_abs = 0; for expr in window.window_expr().iter() { // grow based on function requirements match get_limit_effect(expr) { LimitEffect::None => {} LimitEffect::Unknown => return false, LimitEffect::Relative(rel) => max_rel = max_rel.max(rel), - LimitEffect::Absolute(abs) => max_abs = max_abs.max(abs), + LimitEffect::Absolute(val) => { + let cur = ctx.limit.unwrap_or(0); + ctx.limit = Some(cur.max(val)) + } } // grow based on frames @@ -169,14 +171,13 @@ fn grow_limit(window: &BoundedWindowAggExec, ctx: &mut TraverseState) -> bool { return false; // expression-based limits not statically evaluatable } let Some(end_bound) = bound_to_usize(&frame.end_bound) else { - return false; + return false; // can't optimize unbounded window expressions }; - ctx.lookahead = ctx.lookahead.max(end_bound); + ctx.max_lookahead(end_bound); } // finish grow ctx.max_lookahead(ctx.lookahead + max_rel); - ctx.max_lookahead(max_abs); true } @@ -191,7 +192,7 @@ fn apply_limit( let Some(fetch) = latest else { ctx.limit = None; ctx.lookahead = 0; - return Some(Transformed::no(node.clone())); + return Some(Transformed::no(Arc::clone(node))); }; let fetch = match node.fetch() { None => fetch + ctx.lookahead, diff --git a/datafusion/sqllogictest/test_files/window_limits.slt b/datafusion/sqllogictest/test_files/window_limits.slt index 1c5d248f6ccd..01e04808f1cf 100644 --- a/datafusion/sqllogictest/test_files/window_limits.slt +++ b/datafusion/sqllogictest/test_files/window_limits.slt @@ -75,6 +75,80 @@ physical_plan 04)------SortExec: TopK(fetch=4), expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false] 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno], file_type=csv, has_header=true +# 2 < 3... nth_value should not grow the limit +statement ok +set datafusion.optimizer.enable_window_limits = false; + +query I +SELECT NTH_VALUE(empno, 2) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM employees LIMIT 3 +---- +NULL +299 +299 + +statement ok +set datafusion.optimizer.enable_window_limits = true; + +query I +SELECT NTH_VALUE(empno, 2) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM employees LIMIT 3 +---- +NULL +299 +299 + +query TT +EXPLAIN +SELECT NTH_VALUE(empno, 2) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM employees LIMIT 3 +---- +logical_plan +01)Projection: nth_value(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW +02)--Limit: skip=0, fetch=3 +03)----WindowAggr: windowExpr=[[nth_value(employees.empno, Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] +04)------TableScan: employees projection=[empno] +physical_plan +01)ProjectionExec: expr=[nth_value(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as nth_value(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW] +02)--GlobalLimitExec: skip=0, fetch=3 +03)----BoundedWindowAggExec: wdw=[nth_value(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "nth_value(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] +04)------SortExec: TopK(fetch=3), expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false] +05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno], file_type=csv, has_header=true + +# 5 > 3... nth_value still won't grow the limit - it's causal +statement ok +set datafusion.optimizer.enable_window_limits = false; + +query I +SELECT NTH_VALUE(empno, 5) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM employees LIMIT 3 +---- +NULL +NULL +NULL + +statement ok +set datafusion.optimizer.enable_window_limits = true; + +query I +SELECT NTH_VALUE(empno, 5) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM employees LIMIT 3 +---- +NULL +NULL +NULL + +query TT +EXPLAIN +SELECT NTH_VALUE(empno, 5) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM employees LIMIT 3 +---- +logical_plan +01)Projection: nth_value(employees.empno,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW +02)--Limit: skip=0, fetch=3 +03)----WindowAggr: windowExpr=[[nth_value(employees.empno, Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] +04)------TableScan: employees projection=[empno] +physical_plan +01)ProjectionExec: expr=[nth_value(employees.empno,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as nth_value(employees.empno,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW] +02)--GlobalLimitExec: skip=0, fetch=3 +03)----BoundedWindowAggExec: wdw=[nth_value(employees.empno,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "nth_value(employees.empno,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] +04)------SortExec: TopK(fetch=3), expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false] +05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno], file_type=csv, has_header=true + # should handle partition by unoptimized statement ok set datafusion.optimizer.enable_window_limits = false; From b5b72a3760107a0bb6289058e95cea09b492cac8 Mon Sep 17 00:00:00 2001 From: Brent Gardner Date: Mon, 13 Oct 2025 14:46:29 -0600 Subject: [PATCH 23/30] aggregates --- datafusion/functions-window/src/nth_value.rs | 1 - .../sqllogictest/test_files/window_limits.slt | 65 +++++++++++++++++++ 2 files changed, 65 insertions(+), 1 deletion(-) diff --git a/datafusion/functions-window/src/nth_value.rs b/datafusion/functions-window/src/nth_value.rs index 985d48474ca3..329d8aa5ab17 100644 --- a/datafusion/functions-window/src/nth_value.rs +++ b/datafusion/functions-window/src/nth_value.rs @@ -31,7 +31,6 @@ use datafusion_expr::{ }; use datafusion_functions_window_common::field; use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; -use datafusion_physical_expr::expressions; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use field::WindowUDFFieldArgs; use std::any::Any; diff --git a/datafusion/sqllogictest/test_files/window_limits.slt b/datafusion/sqllogictest/test_files/window_limits.slt index 01e04808f1cf..2f9cde9c47ab 100644 --- a/datafusion/sqllogictest/test_files/window_limits.slt +++ b/datafusion/sqllogictest/test_files/window_limits.slt @@ -149,6 +149,71 @@ physical_plan 04)------SortExec: TopK(fetch=3), expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false] 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno], file_type=csv, has_header=true +# aggregate functions shouldn't effect the window +statement ok +set datafusion.optimizer.enable_window_limits = false; + +query TIIRII +SELECT + depname, + empno, + SUM(salary) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS running_sum, + AVG(salary) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS running_avg, + MIN(salary) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS running_min, + MAX(salary) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS running_max +FROM employees +LIMIT 5; +---- +a 102 3276123488 3276123488 3276123488 3276123488 +e 299 3304897863 1652448931.5 28774375 3276123488 +a 363 5170205535 1723401845 28774375 3276123488 +e 417 5727722654 1431930663.5 28774375 3276123488 +d 794 9789357761 1957871552.2 28774375 4061635107 + +statement ok +set datafusion.optimizer.enable_window_limits = true; + +query TIIRII +SELECT + depname, + empno, + SUM(salary) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS running_sum, + AVG(salary) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS running_avg, + MIN(salary) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS running_min, + MAX(salary) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS running_max +FROM employees +LIMIT 5; +---- +a 102 3276123488 3276123488 3276123488 3276123488 +e 299 3304897863 1652448931.5 28774375 3276123488 +a 363 5170205535 1723401845 28774375 3276123488 +e 417 5727722654 1431930663.5 28774375 3276123488 +d 794 9789357761 1957871552.2 28774375 4061635107 + +query TT +EXPLAIN +SELECT + depname, + empno, + SUM(salary) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS running_sum, + AVG(salary) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS running_avg, + MIN(salary) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS running_min, + MAX(salary) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS running_max +FROM employees +LIMIT 5; +---- +logical_plan +01)Projection: employees.depname, employees.empno, sum(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS running_sum, avg(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS running_avg, min(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS running_min, max(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS running_max +02)--Limit: skip=0, fetch=5 +03)----WindowAggr: windowExpr=[[sum(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, avg(CAST(employees.salary AS Float64)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, min(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, max(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] +04)------TableScan: employees projection=[depname, empno, salary] +physical_plan +01)ProjectionExec: expr=[depname@0 as depname, empno@1 as empno, sum(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as running_sum, avg(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as running_avg, min(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as running_min, max(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as running_max] +02)--GlobalLimitExec: skip=0, fetch=5 +03)----BoundedWindowAggExec: wdw=[sum(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, avg(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "avg(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, min(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "min(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, max(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "max(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] +04)------SortExec: TopK(fetch=5), expr=[empno@1 ASC NULLS LAST], preserve_partitioning=[false] +05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[depname, empno, salary], file_type=csv, has_header=true + # should handle partition by unoptimized statement ok set datafusion.optimizer.enable_window_limits = false; From 1652bcdda943c5e35c57547ee99be96787be902a Mon Sep 17 00:00:00 2001 From: Brent Gardner Date: Mon, 13 Oct 2025 14:55:34 -0600 Subject: [PATCH 24/30] ranking functions --- .../sqllogictest/test_files/window_limits.slt | 126 +++++++++++++++++- 1 file changed, 125 insertions(+), 1 deletion(-) diff --git a/datafusion/sqllogictest/test_files/window_limits.slt b/datafusion/sqllogictest/test_files/window_limits.slt index 2f9cde9c47ab..e273dee2d2e2 100644 --- a/datafusion/sqllogictest/test_files/window_limits.slt +++ b/datafusion/sqllogictest/test_files/window_limits.slt @@ -149,7 +149,7 @@ physical_plan 04)------SortExec: TopK(fetch=3), expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false] 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno], file_type=csv, has_header=true -# aggregate functions shouldn't effect the window +# aggregate functions shouldn't affect the window statement ok set datafusion.optimizer.enable_window_limits = false; @@ -214,6 +214,130 @@ physical_plan 04)------SortExec: TopK(fetch=5), expr=[empno@1 ASC NULLS LAST], preserve_partitioning=[false] 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[depname, empno, salary], file_type=csv, has_header=true +# ranking functions that don't affect the limit +statement ok +set datafusion.optimizer.enable_window_limits = false; + +query IIII +SELECT + empno, + row_number() OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS rn, + rank() OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS rnk, + dense_rank() OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS drnk +FROM employees +ORDER BY empno +LIMIT 5; +---- +102 1 1 1 +299 2 2 2 +363 3 3 3 +417 4 4 4 +794 5 5 5 + +statement ok +set datafusion.optimizer.enable_window_limits = true; + +query IIII +SELECT + empno, + row_number() OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS rn, + rank() OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS rnk, + dense_rank() OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS drnk +FROM employees +ORDER BY empno +LIMIT 5; +---- +102 1 1 1 +299 2 2 2 +363 3 3 3 +417 4 4 4 +794 5 5 5 + +query TT +EXPLAIN +SELECT + empno, + row_number() OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS rn, + rank() OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS rnk, + dense_rank() OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS drnk +FROM employees +ORDER BY empno +LIMIT 5; +---- +logical_plan +01)Sort: employees.empno ASC NULLS LAST, fetch=5 +02)--Projection: employees.empno, row_number() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn, rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rnk, dense_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS drnk +03)----WindowAggr: windowExpr=[[row_number() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, dense_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] +04)------TableScan: employees projection=[empno] +physical_plan +01)ProjectionExec: expr=[empno@0 as empno, row_number() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn, rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rnk, dense_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as drnk] +02)--GlobalLimitExec: skip=0, fetch=5 +03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, dense_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "dense_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] +04)------SortExec: TopK(fetch=5), expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false] +05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno], file_type=csv, has_header=true + +# Unoptimizable global ranking functions +statement ok +set datafusion.optimizer.enable_window_limits = false; + +query IRRI +SELECT + empno, + percent_rank() OVER (ORDER BY empno) AS pr, + cume_dist() OVER (ORDER BY empno) AS cd, + ntile(4) OVER (ORDER BY empno) AS nt +FROM employees +ORDER BY empno +LIMIT 5; +---- +102 0 0.01 1 +299 0.010101010101 0.02 1 +363 0.020202020202 0.03 1 +417 0.030303030303 0.04 1 +794 0.040404040404 0.05 1 + +statement ok +set datafusion.optimizer.enable_window_limits = true; + +query IRRI +SELECT + empno, + percent_rank() OVER (ORDER BY empno) AS pr, + cume_dist() OVER (ORDER BY empno) AS cd, + ntile(4) OVER (ORDER BY empno) AS nt +FROM employees +ORDER BY empno +LIMIT 5; +---- +102 0 0.01 1 +299 0.010101010101 0.02 1 +363 0.020202020202 0.03 1 +417 0.030303030303 0.04 1 +794 0.040404040404 0.05 1 + +query TT +EXPLAIN +SELECT + empno, + percent_rank() OVER (ORDER BY empno) AS pr, + cume_dist() OVER (ORDER BY empno) AS cd, + ntile(4) OVER (ORDER BY empno) AS nt +FROM employees +ORDER BY empno +LIMIT 5; +---- +logical_plan +01)Sort: employees.empno ASC NULLS LAST, fetch=5 +02)--Projection: employees.empno, percent_rank() ORDER BY [employees.empno ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS pr, cume_dist() ORDER BY [employees.empno ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS cd, ntile(Int64(4)) ORDER BY [employees.empno ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS nt +03)----WindowAggr: windowExpr=[[percent_rank() ORDER BY [employees.empno ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, cume_dist() ORDER BY [employees.empno ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, ntile(Int64(4)) ORDER BY [employees.empno ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] +04)------TableScan: employees projection=[empno] +physical_plan +01)ProjectionExec: expr=[empno@0 as empno, percent_rank() ORDER BY [employees.empno ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as pr, cume_dist() ORDER BY [employees.empno ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as cd, ntile(Int64(4)) ORDER BY [employees.empno ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as nt] +02)--GlobalLimitExec: skip=0, fetch=5 +03)----WindowAggExec: wdw=[percent_rank() ORDER BY [employees.empno ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "percent_rank() ORDER BY [employees.empno ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }, cume_dist() ORDER BY [employees.empno ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "cume_dist() ORDER BY [employees.empno ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }, ntile(Int64(4)) ORDER BY [employees.empno ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ntile(Int64(4)) ORDER BY [employees.empno ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }] +04)------SortExec: expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false] +05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno], file_type=csv, has_header=true + # should handle partition by unoptimized statement ok set datafusion.optimizer.enable_window_limits = false; From f066d9eec73c115ab5792d69a2297fa034d63ec9 Mon Sep 17 00:00:00 2001 From: Brent Gardner Date: Mon, 13 Oct 2025 15:07:02 -0600 Subject: [PATCH 25/30] More tests --- .../sqllogictest/test_files/window_limits.slt | 128 ++++++++++++++++-- 1 file changed, 115 insertions(+), 13 deletions(-) diff --git a/datafusion/sqllogictest/test_files/window_limits.slt b/datafusion/sqllogictest/test_files/window_limits.slt index e273dee2d2e2..37811c5e3a26 100644 --- a/datafusion/sqllogictest/test_files/window_limits.slt +++ b/datafusion/sqllogictest/test_files/window_limits.slt @@ -75,6 +75,43 @@ physical_plan 04)------SortExec: TopK(fetch=4), expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false] 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno], file_type=csv, has_header=true +# lead defaults can lookahead by any amount and should grow limit +statement ok +set datafusion.optimizer.enable_window_limits = false; + +query I +SELECT LEAD(empno, 2) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM employees LIMIT 3 +---- +363 +417 +794 + +statement ok +set datafusion.optimizer.enable_window_limits = true; + +query I +SELECT LEAD(empno, 2) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM employees LIMIT 3 +---- +363 +417 +794 + +query TT +EXPLAIN +SELECT LEAD(empno, 2) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM employees LIMIT 3 +---- +logical_plan +01)Projection: lead(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW +02)--Limit: skip=0, fetch=3 +03)----WindowAggr: windowExpr=[[lead(employees.empno, Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] +04)------TableScan: employees projection=[empno] +physical_plan +01)ProjectionExec: expr=[lead(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as lead(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW] +02)--GlobalLimitExec: skip=0, fetch=3 +03)----BoundedWindowAggExec: wdw=[lead(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "lead(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] +04)------SortExec: TopK(fetch=5), expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false] +05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno], file_type=csv, has_header=true + # 2 < 3... nth_value should not grow the limit statement ok set datafusion.optimizer.enable_window_limits = false; @@ -283,9 +320,9 @@ set datafusion.optimizer.enable_window_limits = false; query IRRI SELECT empno, - percent_rank() OVER (ORDER BY empno) AS pr, - cume_dist() OVER (ORDER BY empno) AS cd, - ntile(4) OVER (ORDER BY empno) AS nt + percent_rank() OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS pr, + cume_dist() OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS cd, + ntile(4) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS nt FROM employees ORDER BY empno LIMIT 5; @@ -302,9 +339,9 @@ set datafusion.optimizer.enable_window_limits = true; query IRRI SELECT empno, - percent_rank() OVER (ORDER BY empno) AS pr, - cume_dist() OVER (ORDER BY empno) AS cd, - ntile(4) OVER (ORDER BY empno) AS nt + percent_rank() OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS pr, + cume_dist() OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS cd, + ntile(4) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS nt FROM employees ORDER BY empno LIMIT 5; @@ -319,25 +356,90 @@ query TT EXPLAIN SELECT empno, - percent_rank() OVER (ORDER BY empno) AS pr, - cume_dist() OVER (ORDER BY empno) AS cd, - ntile(4) OVER (ORDER BY empno) AS nt + percent_rank() OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS pr, + cume_dist() OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS cd, + ntile(4) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS nt FROM employees ORDER BY empno LIMIT 5; ---- logical_plan 01)Sort: employees.empno ASC NULLS LAST, fetch=5 -02)--Projection: employees.empno, percent_rank() ORDER BY [employees.empno ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS pr, cume_dist() ORDER BY [employees.empno ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS cd, ntile(Int64(4)) ORDER BY [employees.empno ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS nt -03)----WindowAggr: windowExpr=[[percent_rank() ORDER BY [employees.empno ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, cume_dist() ORDER BY [employees.empno ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, ntile(Int64(4)) ORDER BY [employees.empno ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] +02)--Projection: employees.empno, percent_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS pr, cume_dist() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS cd, ntile(Int64(4)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS nt +03)----WindowAggr: windowExpr=[[percent_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, cume_dist() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, ntile(Int64(4)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] 04)------TableScan: employees projection=[empno] physical_plan -01)ProjectionExec: expr=[empno@0 as empno, percent_rank() ORDER BY [employees.empno ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as pr, cume_dist() ORDER BY [employees.empno ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as cd, ntile(Int64(4)) ORDER BY [employees.empno ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as nt] +01)ProjectionExec: expr=[empno@0 as empno, percent_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as pr, cume_dist() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as cd, ntile(Int64(4)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as nt] 02)--GlobalLimitExec: skip=0, fetch=5 -03)----WindowAggExec: wdw=[percent_rank() ORDER BY [employees.empno ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "percent_rank() ORDER BY [employees.empno ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }, cume_dist() ORDER BY [employees.empno ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "cume_dist() ORDER BY [employees.empno ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }, ntile(Int64(4)) ORDER BY [employees.empno ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ntile(Int64(4)) ORDER BY [employees.empno ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }] +03)----WindowAggExec: wdw=[percent_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "percent_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }, cume_dist() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "cume_dist() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }, ntile(Int64(4)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ntile(Int64(4)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }] 04)------SortExec: expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false] 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno], file_type=csv, has_header=true +# Analytical functions that don't lookahead +statement ok +set datafusion.optimizer.enable_window_limits = false; + +query IIIII +SELECT + empno, + first_value(salary) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS fv, + lag(salary, 1) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS l1, + last_value(salary) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS lv, + nth_value(salary, 3) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS n3 +FROM employees +ORDER BY empno +LIMIT 5; +---- +102 3276123488 NULL 3276123488 NULL +299 3276123488 3276123488 28774375 NULL +363 3276123488 28774375 1865307672 1865307672 +417 3276123488 1865307672 557517119 1865307672 +794 3276123488 557517119 4061635107 1865307672 + +statement ok +set datafusion.optimizer.enable_window_limits = true; + +query IIIII +SELECT + empno, + first_value(salary) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS fv, + lag(salary, 1) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS l1, + last_value(salary) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS lv, + nth_value(salary, 3) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS n3 +FROM employees +ORDER BY empno +LIMIT 5; +---- +102 3276123488 NULL 3276123488 NULL +299 3276123488 3276123488 28774375 NULL +363 3276123488 28774375 1865307672 1865307672 +417 3276123488 1865307672 557517119 1865307672 +794 3276123488 557517119 4061635107 1865307672 + +query TT +EXPLAIN +SELECT + empno, + first_value(salary) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS fv, + lag(salary, 1) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS l1, + last_value(salary) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS lv, + nth_value(salary, 3) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS n3 +FROM employees +ORDER BY empno +LIMIT 5; +---- +logical_plan +01)Sort: employees.empno ASC NULLS LAST, fetch=5 +02)--Projection: employees.empno, first_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS fv, lag(employees.salary,Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS l1, last_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS lv, nth_value(employees.salary,Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS n3 +03)----WindowAggr: windowExpr=[[first_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, lag(employees.salary, Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, last_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, nth_value(employees.salary, Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] +04)------TableScan: employees projection=[empno, salary] +physical_plan +01)ProjectionExec: expr=[empno@0 as empno, first_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as fv, lag(employees.salary,Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as l1, last_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as lv, nth_value(employees.salary,Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as n3] +02)--GlobalLimitExec: skip=0, fetch=5 +03)----BoundedWindowAggExec: wdw=[first_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "first_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, lag(employees.salary,Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "lag(employees.salary,Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, last_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "last_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, nth_value(employees.salary,Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "nth_value(employees.salary,Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] +04)------SortExec: TopK(fetch=5), expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false] +05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno, salary], file_type=csv, has_header=true + # should handle partition by unoptimized statement ok set datafusion.optimizer.enable_window_limits = false; From e75c662e8b329259d621cdb839ee5b501716be88 Mon Sep 17 00:00:00 2001 From: Brent Gardner Date: Mon, 13 Oct 2025 15:12:40 -0600 Subject: [PATCH 26/30] Max lead test --- .../sqllogictest/test_files/window_limits.slt | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/datafusion/sqllogictest/test_files/window_limits.slt b/datafusion/sqllogictest/test_files/window_limits.slt index 37811c5e3a26..0454ba0e4664 100644 --- a/datafusion/sqllogictest/test_files/window_limits.slt +++ b/datafusion/sqllogictest/test_files/window_limits.slt @@ -112,6 +112,68 @@ physical_plan 04)------SortExec: TopK(fetch=5), expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false] 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno], file_type=csv, has_header=true +# Should use the max of leads +statement ok +set datafusion.optimizer.enable_window_limits = false; + +query IIII +SELECT + empno, + LEAD(salary, 1) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS lead1, + LEAD(salary, 3) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS lead3, + LEAD(salary, 5) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS lead5 +FROM employees +ORDER BY empno +LIMIT 5; +---- +102 28774375 557517119 4015442341 +299 1865307672 4061635107 3542840110 +363 557517119 4015442341 1088543984 +417 4061635107 3542840110 1362369177 +794 4015442341 1088543984 145294611 + +statement ok +set datafusion.optimizer.enable_window_limits = true; + +query IIII +SELECT + empno, + LEAD(salary, 1) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS lead1, + LEAD(salary, 3) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS lead3, + LEAD(salary, 5) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS lead5 +FROM employees +ORDER BY empno +LIMIT 5; +---- +102 28774375 557517119 4015442341 +299 1865307672 4061635107 3542840110 +363 557517119 4015442341 1088543984 +417 4061635107 3542840110 1362369177 +794 4015442341 1088543984 145294611 + +query TT +EXPLAIN +SELECT + empno, + LEAD(salary, 1) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS lead1, + LEAD(salary, 3) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS lead3, + LEAD(salary, 5) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS lead5 +FROM employees +ORDER BY empno +LIMIT 5; +---- +logical_plan +01)Sort: employees.empno ASC NULLS LAST, fetch=5 +02)--Projection: employees.empno, lead(employees.salary,Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS lead1, lead(employees.salary,Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS lead3, lead(employees.salary,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS lead5 +03)----WindowAggr: windowExpr=[[lead(employees.salary, Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, lead(employees.salary, Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, lead(employees.salary, Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] +04)------TableScan: employees projection=[empno, salary] +physical_plan +01)ProjectionExec: expr=[empno@0 as empno, lead(employees.salary,Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as lead1, lead(employees.salary,Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as lead3, lead(employees.salary,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as lead5] +02)--GlobalLimitExec: skip=0, fetch=5 +03)----BoundedWindowAggExec: wdw=[lead(employees.salary,Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "lead(employees.salary,Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, lead(employees.salary,Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "lead(employees.salary,Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, lead(employees.salary,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "lead(employees.salary,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] +04)------SortExec: TopK(fetch=10), expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false] +05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno, salary], file_type=csv, has_header=true + # 2 < 3... nth_value should not grow the limit statement ok set datafusion.optimizer.enable_window_limits = false; From 4f7ac8cb3127e08b7f93ac5644bed9748477d869 Mon Sep 17 00:00:00 2001 From: Brent Gardner Date: Mon, 13 Oct 2025 16:51:15 -0600 Subject: [PATCH 27/30] More tests, JIC --- .../sqllogictest/test_files/window_limits.slt | 91 +++++++++++++++++++ 1 file changed, 91 insertions(+) diff --git a/datafusion/sqllogictest/test_files/window_limits.slt b/datafusion/sqllogictest/test_files/window_limits.slt index 0454ba0e4664..150db0ab1358 100644 --- a/datafusion/sqllogictest/test_files/window_limits.slt +++ b/datafusion/sqllogictest/test_files/window_limits.slt @@ -593,3 +593,94 @@ physical_plan 06)----------RepartitionExec: partitioning=Hash([depname@0], 4), input_partitions=4 07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[depname, empno, salary], file_type=csv, has_header=true + +# unbounded following +statement ok +set datafusion.optimizer.enable_window_limits = false; + +query I +SELECT LEAD(salary) OVER (ORDER BY empno ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) +FROM employees +LIMIT 5; +---- +28774375 +1865307672 +557517119 +4061635107 +4015442341 + +statement ok +set datafusion.optimizer.enable_window_limits = true; + +query I +SELECT LEAD(salary) OVER (ORDER BY empno ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) +FROM employees +LIMIT 5; +---- +28774375 +1865307672 +557517119 +4061635107 +4015442341 + +# RANGE +statement ok +set datafusion.optimizer.enable_window_limits = false; + +query I +SELECT LEAD(salary) OVER (ORDER BY empno RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) +FROM employees +LIMIT 5; +---- +28774375 +1865307672 +557517119 +4061635107 +4015442341 + +statement ok +set datafusion.optimizer.enable_window_limits = true; + +query I +SELECT LEAD(salary) OVER (ORDER BY empno RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) +FROM employees +LIMIT 5; +---- +28774375 +1865307672 +557517119 +4061635107 +4015442341 + +# multiple windows +statement ok +set datafusion.optimizer.enable_window_limits = false; + +query II +SELECT + LEAD(salary, 1) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + LEAD(salary, 5) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) +FROM employees +LIMIT 5; +---- +28774375 4015442341 +1865307672 3542840110 +557517119 1088543984 +4061635107 1362369177 +4015442341 145294611 + +statement ok +set datafusion.optimizer.enable_window_limits = true; + +query II +SELECT + LEAD(salary, 1) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + LEAD(salary, 5) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) +FROM employees +LIMIT 5; +---- +28774375 4015442341 +1865307672 3542840110 +557517119 1088543984 +4061635107 1362369177 +4015442341 145294611 From afc67d88933d59bdd4a4482ccf2f620adbf88fc6 Mon Sep 17 00:00:00 2001 From: Brent Gardner Date: Mon, 13 Oct 2025 17:11:06 -0600 Subject: [PATCH 28/30] More tests, JIC --- .../sqllogictest/test_files/window_limits.slt | 83 +++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/datafusion/sqllogictest/test_files/window_limits.slt b/datafusion/sqllogictest/test_files/window_limits.slt index 150db0ab1358..c1e680084f4b 100644 --- a/datafusion/sqllogictest/test_files/window_limits.slt +++ b/datafusion/sqllogictest/test_files/window_limits.slt @@ -684,3 +684,86 @@ LIMIT 5; 557517119 1088543984 4061635107 1362369177 4015442341 145294611 + +# sliding +statement ok +set datafusion.optimizer.enable_window_limits = false; + +query III +SELECT + empno, + salary, + SUM(salary) OVER (ORDER BY empno ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS sliding_sum +FROM employees +LIMIT 3; +---- +102 3276123488 3276123488 +299 28774375 3304897863 +363 1865307672 5170205535 + +statement ok +set datafusion.optimizer.enable_window_limits = true; + +query III +SELECT + empno, + salary, + SUM(salary) OVER (ORDER BY empno ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS sliding_sum +FROM employees +LIMIT 3; +---- +102 3276123488 3276123488 +299 28774375 3304897863 +363 1865307672 5170205535 + +# sliding lead +statement ok +set datafusion.optimizer.enable_window_limits = false; + +query III +SELECT + empno, + salary, + LEAD(salary, 2) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS lead2 +FROM employees +LIMIT 3; +---- +102 3276123488 1865307672 +299 28774375 557517119 +363 1865307672 4061635107 + +statement ok +set datafusion.optimizer.enable_window_limits = true; + +query III +SELECT + empno, + salary, + LEAD(salary, 2) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS lead2 +FROM employees +LIMIT 3; +---- +102 3276123488 1865307672 +299 28774375 557517119 +363 1865307672 4061635107 + +query TT +EXPLAIN +SELECT + empno, + salary, + LEAD(salary, 2) OVER (ORDER BY empno ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS lead2 +FROM employees +LIMIT 3; +---- +logical_plan +01)Projection: employees.empno, employees.salary, lead(employees.salary,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS lead2 +02)--Limit: skip=0, fetch=3 +03)----WindowAggr: windowExpr=[[lead(employees.salary, Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] +04)------TableScan: employees projection=[empno, salary] +physical_plan +01)ProjectionExec: expr=[empno@0 as empno, salary@1 as salary, lead(employees.salary,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as lead2] +02)--GlobalLimitExec: skip=0, fetch=3 +03)----BoundedWindowAggExec: wdw=[lead(employees.salary,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "lead(employees.salary,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] +04)------SortExec: TopK(fetch=5), expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false] +05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno, salary], file_type=csv, has_header=true From dc553df0b23e3c5ed911be89c36d648979c57791 Mon Sep 17 00:00:00 2001 From: Brent Gardner Date: Mon, 13 Oct 2025 18:03:49 -0600 Subject: [PATCH 29/30] Notes --- datafusion/physical-optimizer/src/limit_pushdown_past_window.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs index 1c671cd07488..fe3e63379773 100644 --- a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs +++ b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs @@ -167,6 +167,7 @@ fn grow_limit(window: &BoundedWindowAggExec, ctx: &mut TraverseState) -> bool { // grow based on frames let frame = expr.get_window_frame(); + // TODO https://github.com/apache/datafusion/pull/17347#discussion_r2310673423 if frame.units != WindowFrameUnits::Rows { return false; // expression-based limits not statically evaluatable } From 15c479c6e0c957d4c818d7e282ee87b9e124cc85 Mon Sep 17 00:00:00 2001 From: Brent Gardner Date: Mon, 13 Oct 2025 18:16:10 -0600 Subject: [PATCH 30/30] Notes-- --- datafusion/physical-optimizer/src/limit_pushdown_past_window.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs index fe3e63379773..1c671cd07488 100644 --- a/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs +++ b/datafusion/physical-optimizer/src/limit_pushdown_past_window.rs @@ -167,7 +167,6 @@ fn grow_limit(window: &BoundedWindowAggExec, ctx: &mut TraverseState) -> bool { // grow based on frames let frame = expr.get_window_frame(); - // TODO https://github.com/apache/datafusion/pull/17347#discussion_r2310673423 if frame.units != WindowFrameUnits::Rows { return false; // expression-based limits not statically evaluatable }