From 1cac6707edc75b0fd5a349e552738a64f2f070b2 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 3 Oct 2025 10:17:52 -0400 Subject: [PATCH 01/14] Refactor: Update enforce_sorting tests to use insta snapshots for easier updates --- .../physical_optimizer/enforce_sorting.rs | 590 ++++++++++++++---- 1 file changed, 453 insertions(+), 137 deletions(-) diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs index 3858e70eaf3e6..74c56196d4181 100644 --- a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs +++ b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs @@ -70,6 +70,7 @@ use datafusion_execution::TaskContext; use datafusion_catalog::streaming::StreamingTable; use futures::StreamExt; +use insta::assert_snapshot; use rstest::rstest; /// Create a sorted Csv exec @@ -91,6 +92,138 @@ fn csv_exec_sorted( DataSourceExec::from_data_source(config) } +/// Runs the sort enforcement optimizer and asserts the plan +/// against the original and expected plans +struct EnforceSortingTest { + plan: Arc, + repartition_sorts: bool, + /// If true, asserts that the input and optimized plans are the same + expect_no_change: bool, + /// A message printed into the snapshot to describe the expected output + expected_description: Option, +} + +impl EnforceSortingTest { + fn new(plan: Arc) -> Self { + Self { + plan, + repartition_sorts: false, + expect_no_change: false, + expected_description: None, + } + } + + /// Set whether to repartition sorts + fn with_repartition_sorts(mut self, repartition_sorts: bool) -> Self { + self.repartition_sorts = repartition_sorts; + self + } + + /// Set whether to expect no change in the plan + fn with_expect_no_change(mut self, expect_no_change: bool) -> Self { + self.expect_no_change = expect_no_change; + self + } + + /// Add an expected output description + fn with_expected_description(mut self, description: &str) -> Self { + self.expected_description = Some(format!("{description}\n")); + self + } + + /// Runs the enforce sorting test and returns a string with the input and + /// optimized plan as strings for snapshot comparison using insta + fn run(&self) -> String { + let mut config = ConfigOptions::new(); + config.optimizer.repartition_sorts = self.repartition_sorts; + + // This file has 4 rules that use tree node, apply these rules as in the + // EnforceSorting::optimize implementation + // After these operations tree nodes should be in a consistent state. + // This code block makes sure that these rules doesn't violate tree node integrity. + { + let plan_requirements = + PlanWithCorrespondingSort::new_default(Arc::clone(&self.plan)); + let adjusted = plan_requirements + .transform_up(ensure_sorting) + .data() + .and_then(check_integrity) + .expect("check_integrity failed after ensure_sorting"); + // TODO: End state payloads will be checked here. + + let new_plan = if config.optimizer.repartition_sorts { + let plan_with_coalesce_partitions = + PlanWithCorrespondingCoalescePartitions::new_default(adjusted.plan); + let parallel = plan_with_coalesce_partitions + .transform_up(parallelize_sorts) + .data() + .and_then(check_integrity) + .expect("check_integrity failed after parallelize_sorts"); + // TODO: End state payloads will be checked here. + parallel.plan + } else { + adjusted.plan + }; + + let plan_with_pipeline_fixer = + OrderPreservationContext::new_default(new_plan); + let updated_plan = plan_with_pipeline_fixer + .transform_up(|plan_with_pipeline_fixer| { + replace_with_order_preserving_variants( + plan_with_pipeline_fixer, + false, + true, + &config, + ) + }) + .data() + .and_then(check_integrity) + .expect( + "check_integrity failed after replace_with_order_preserving_variants", + ); + // TODO: End state payloads will be checked here. + + let mut sort_pushdown = SortPushDown::new_default(updated_plan.plan); + assign_initial_requirements(&mut sort_pushdown); + check_integrity( + pushdown_sorts(sort_pushdown).expect("pushdown_sorts failed"), + ) + .expect("check_integrity failed after pushdown_sorts"); + // TODO: End state payloads will be checked here. + } + let input_plan_string = displayable(self.plan.as_ref()).indent(true).to_string(); + + // Run the actual optimizer + let optimized_physical_plan = EnforceSorting::new() + .optimize(Arc::clone(&self.plan), &config) + .expect("enforce_sorting failed"); + + // Get string representation of the plan + let optimized_plan_string = displayable(optimized_physical_plan.as_ref()) + .indent(true) + .to_string(); + + let expected_input: Vec<&str> = input_plan_string.trim().lines().collect(); + let expected_optimized: Vec<&str> = + optimized_plan_string.trim().lines().collect(); + + if self.expect_no_change { + assert_eq!(expected_input, expected_optimized, + "Expected no change in the plan, but the optimized plan differs from the input plan:\n\n\ + Input Plan:\n{expected_input:#?}\n\nOptimized Plan:\n{expected_optimized:#?}\n" + ); + } + + let expected_description = self.expected_description.as_deref().unwrap_or(""); + + // return a string with both input and optimized plan + format!( + "let expected_input = {expected_input:#?};\n\ + {expected_description}let expected_optimized = {expected_optimized:#?};", + ) + } +} + /// Runs the sort enforcement optimizer and asserts the plan /// against the original and expected plans /// @@ -193,6 +326,8 @@ async fn test_remove_unnecessary_sort5() -> Result<()> { let join = hash_join_exec(left_input, right_input, on, None, &JoinType::Inner)?; let physical_plan = sort_exec([sort_expr("a", &join.schema())].into(), join); + let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "SortExec: expr=[a@2 ASC], preserve_partitioning=[false]", " HashJoinExec: mode=Partitioned, join_type=Inner, on=[(col_a@0, c@2)]", @@ -204,8 +339,7 @@ async fn test_remove_unnecessary_sort5() -> Result<()> { " DataSourceExec: partitions=1, partition_sizes=[0]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); - + "#); Ok(()) } @@ -226,6 +360,10 @@ async fn test_do_not_remove_sort_with_limit() -> Result<()> { let repartition = repartition_exec(union); let physical_plan = sort_preserving_merge_exec(ordering, repartition); + let test = EnforceSortingTest::new(physical_plan) + .with_repartition_sorts(true) + .with_expected_description("// We should keep the bottom `SortExec`."); + assert_snapshot!(test.run(), @r#" let expected_input = [ "SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]", " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2", @@ -246,7 +384,7 @@ async fn test_do_not_remove_sort_with_limit() -> Result<()> { " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); + "#); Ok(()) } @@ -262,6 +400,12 @@ async fn test_union_inputs_sorted() -> Result<()> { let physical_plan = sort_preserving_merge_exec(ordering, union); // one input to the union is already sorted, one is not. + let test = EnforceSortingTest::new(physical_plan) + .with_repartition_sorts(true) + .with_expected_description("// should not add a sort at the output of the union, input plan should not be changed") + .with_expect_no_change(true); + + assert_snapshot!(test.run(), @r#" let expected_input = [ "SortPreservingMergeExec: [nullable_col@0 ASC]", " UnionExec", @@ -270,7 +414,14 @@ async fn test_union_inputs_sorted() -> Result<()> { " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", ]; // should not add a sort at the output of the union, input plan should not be changed - assert_optimized!(expected_input, expected_input, physical_plan, true); + let expected_optimized = [ + "SortPreservingMergeExec: [nullable_col@0 ASC]", + " UnionExec", + " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet", + " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", + " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", + ]; + "#); Ok(()) } @@ -291,6 +442,11 @@ async fn test_union_inputs_different_sorted() -> Result<()> { let physical_plan = sort_preserving_merge_exec(ordering, union); // one input to the union is already sorted, one is not. + let test = EnforceSortingTest::new(physical_plan) + .with_repartition_sorts(true) + .with_expected_description("// should not add a sort at the output of the union, input plan should not be changed") + .with_expect_no_change(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "SortPreservingMergeExec: [nullable_col@0 ASC]", " UnionExec", @@ -299,7 +455,14 @@ async fn test_union_inputs_different_sorted() -> Result<()> { " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", ]; // should not add a sort at the output of the union, input plan should not be changed - assert_optimized!(expected_input, expected_input, physical_plan, true); + let expected_optimized = [ + "SortPreservingMergeExec: [nullable_col@0 ASC]", + " UnionExec", + " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC, non_nullable_col@1 ASC], file_type=parquet", + " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", + " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", + ]; + "#); Ok(()) } @@ -322,6 +485,8 @@ async fn test_union_inputs_different_sorted2() -> Result<()> { // Input is an invalid plan. In this case rule should add required sorting in appropriate places. // First DataSourceExec has output ordering(nullable_col@0 ASC). However, it doesn't satisfy the // required ordering of SortPreservingMergeExec. + let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]", " UnionExec", @@ -337,7 +502,7 @@ async fn test_union_inputs_different_sorted2() -> Result<()> { " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); + "#); Ok(()) } @@ -362,6 +527,12 @@ async fn test_union_inputs_different_sorted3() -> Result<()> { // First input to the union is not Sorted (SortExec is finer than required ordering by the SortPreservingMergeExec above). // Second input to the union is already Sorted (matches with the required ordering by the SortPreservingMergeExec above). // Third input to the union is not Sorted (SortExec is matches required ordering by the SortPreservingMergeExec above). + let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true) + .with_expected_description( + "// should adjust sorting in the first input of the union such that it is not unnecessarily fine" + ); + + assert_snapshot!(test.run(), @r#" let expected_input = [ "SortPreservingMergeExec: [nullable_col@0 ASC]", " UnionExec", @@ -381,8 +552,7 @@ async fn test_union_inputs_different_sorted3() -> Result<()> { " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); - + "#); Ok(()) } @@ -406,6 +576,8 @@ async fn test_union_inputs_different_sorted4() -> Result<()> { // Should modify the plan to ensure that all three inputs to the // `UnionExec` satisfy the ordering, OR add a single sort after // the `UnionExec` (both of which are equally good for this example). + let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]", " UnionExec", @@ -425,7 +597,7 @@ async fn test_union_inputs_different_sorted4() -> Result<()> { " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); + "#); Ok(()) } @@ -460,6 +632,8 @@ async fn test_union_inputs_different_sorted5() -> Result<()> { // The `UnionExec` doesn't preserve any of the inputs ordering in the // example below. However, we should be able to change the unnecessarily // fine `SortExec`s below with required `SortExec`s that are absolutely necessary. + let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "SortPreservingMergeExec: [nullable_col@0 ASC]", " UnionExec", @@ -476,7 +650,7 @@ async fn test_union_inputs_different_sorted5() -> Result<()> { " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); + "#); Ok(()) } @@ -505,6 +679,13 @@ async fn test_union_inputs_different_sorted6() -> Result<()> { // At the same time, this ordering requirement is unnecessarily fine. // The final plan should be valid AND the ordering of the third child // shouldn't be finer than necessary. + let test = EnforceSortingTest::new(physical_plan) + .with_repartition_sorts(true) + .with_expected_description( + "// Should adjust the requirement in the third input of the union so\n\ + // that it is not unnecessarily fine.", + ); + assert_snapshot!(test.run(), @r#" let expected_input = [ "SortPreservingMergeExec: [nullable_col@0 ASC]", " UnionExec", @@ -527,7 +708,7 @@ async fn test_union_inputs_different_sorted6() -> Result<()> { " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); + "#); Ok(()) } @@ -548,6 +729,9 @@ async fn test_union_inputs_different_sorted7() -> Result<()> { let physical_plan = sort_preserving_merge_exec(ordering2, union); // Union has unnecessarily fine ordering below it. We should be able to replace them with absolutely necessary ordering. + let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true) + .with_expected_description("// Union preserves the inputs ordering and we should not change any of the SortExecs under UnionExec"); + assert_snapshot!(test.run(), @r#" let expected_input = [ "SortPreservingMergeExec: [nullable_col@0 ASC]", " UnionExec", @@ -557,7 +741,7 @@ async fn test_union_inputs_different_sorted7() -> Result<()> { " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", ]; // Union preserves the inputs ordering and we should not change any of the SortExecs under UnionExec - let expected_output = [ + let expected_optimized = [ "SortPreservingMergeExec: [nullable_col@0 ASC]", " UnionExec", " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", @@ -565,7 +749,7 @@ async fn test_union_inputs_different_sorted7() -> Result<()> { " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", ]; - assert_optimized!(expected_input, expected_output, physical_plan, true); + "#); Ok(()) } @@ -604,6 +788,13 @@ async fn test_union_inputs_different_sorted8() -> Result<()> { // The `UnionExec` doesn't preserve any of the inputs ordering in the // example below. + let test = EnforceSortingTest::new(physical_plan) + .with_repartition_sorts(true) + .with_expected_description( + "// Since `UnionExec` doesn't preserve ordering in the plan above.\n\ + // We shouldn't keep SortExecs in the plan.", + ); + assert_snapshot!(test.run(), @r#" let expected_input = [ "UnionExec", " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", @@ -618,7 +809,7 @@ async fn test_union_inputs_different_sorted8() -> Result<()> { " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); + "#); Ok(()) } @@ -641,22 +832,24 @@ async fn test_soft_hard_requirements_remove_soft_requirement() -> Result<()> { let physical_plan = bounded_window_exec_with_partition("nullable_col", vec![], partition_bys, sort); + let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", ]; - // TODO When sort pushdown respects to the alternatives, and removes soft SortExecs this should be changed - // let expected_optimized = [ - // "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Linear]", - // " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - // ]; let expected_optimized = [ "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); + "#); + // TODO When sort pushdown respects to the alternatives, and removes soft SortExecs this should be changed + // let expected_optimized = [ + // "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Linear]", + // " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", + // ]; Ok(()) } @@ -688,25 +881,27 @@ async fn test_soft_hard_requirements_remove_soft_requirement_without_pushdowns( bounded_window_exec_with_partition("nullable_col", vec![], partition_bys, sort); let physical_plan = projection_exec(proj_exprs, bounded_window)?; + let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as count]", " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", ]; - // TODO When sort pushdown respects to the alternatives, and removes soft SortExecs this should be changed - // let expected_optimized = [ - // "ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as count]", - // " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Linear]", - // " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - // ]; let expected_optimized = [ "ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as count]", " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); + "#); + // TODO When sort pushdown respects to the alternatives, and removes soft SortExecs this should be changed + // let expected_optimized = [ + // "ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as count]", + // " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Linear]", + // " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", + // ]; let ordering = [sort_expr_options( "nullable_col", @@ -735,18 +930,14 @@ async fn test_soft_hard_requirements_remove_soft_requirement_without_pushdowns( projection, ); + let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]", " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", ]; - // TODO When sort pushdown respects to the alternatives, and removes soft SortExecs this should be changed - // let expected_optimized = [ - // "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Linear]", - // " ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]", - // " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - // ]; let expected_optimized = [ "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", @@ -754,7 +945,13 @@ async fn test_soft_hard_requirements_remove_soft_requirement_without_pushdowns( " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); + "#); + // TODO When sort pushdown respects to the alternatives, and removes soft SortExecs this should be changed + // let expected_optimized = [ + // "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Linear]", + // " ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]", + // " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", + // ]; Ok(()) } @@ -795,6 +992,8 @@ async fn test_soft_hard_requirements_multiple_soft_requirements() -> Result<()> bounded_window, ); + let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", @@ -802,13 +1001,6 @@ async fn test_soft_hard_requirements_multiple_soft_requirements() -> Result<()> " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", ]; - // TODO When sort pushdown respects to the alternatives, and removes soft SortExecs this should be changed - // let expected_optimized = [ - // "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Linear]", - // " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Linear]", - // " ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]", - // " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - // ]; let expected_optimized = [ "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", @@ -817,7 +1009,14 @@ async fn test_soft_hard_requirements_multiple_soft_requirements() -> Result<()> " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); + "#); + // TODO When sort pushdown respects to the alternatives, and removes soft SortExecs this should be changed + // let expected_optimized = [ + // "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Linear]", + // " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Linear]", + // " ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]", + // " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", + // ]; let ordering = [sort_expr_options( "nullable_col", @@ -859,7 +1058,8 @@ async fn test_soft_hard_requirements_multiple_soft_requirements() -> Result<()> let sort3 = sort_exec(ordering2, sort2); let physical_plan = bounded_window_exec_with_partition("count", vec![], partition_bys, sort3); - + let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", @@ -869,13 +1069,6 @@ async fn test_soft_hard_requirements_multiple_soft_requirements() -> Result<()> " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", ]; - // TODO When sort pushdown respects to the alternatives, and removes soft SortExecs this should be changed - // let expected_optimized = [ - // "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Linear]", - // " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Linear]", - // " ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]", - // " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - // ]; let expected_optimized = [ "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", @@ -884,7 +1077,14 @@ async fn test_soft_hard_requirements_multiple_soft_requirements() -> Result<()> " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); + "#); + // TODO When sort pushdown respects to the alternatives, and removes soft SortExecs this should be changed + // let expected_optimized = [ + // "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Linear]", + // " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Linear]", + // " ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]", + // " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", + // ]; Ok(()) } @@ -929,20 +1129,16 @@ async fn test_soft_hard_requirements_multiple_sorts() -> Result<()> { .into(); let sort2 = sort_exec(ordering2.clone(), bounded_window); let physical_plan = sort_exec(ordering2, sort2); - + let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]", + " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", + " ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]", " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", ]; - // TODO When sort pushdown respects to the alternatives, and removes soft SortExecs this should be changed - // let expected_optimized = [ - // "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Linear]", - // " ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]", - // " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - // ]; let expected_optimized = [ "SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", @@ -951,7 +1147,13 @@ async fn test_soft_hard_requirements_multiple_sorts() -> Result<()> { " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); + "#); + // TODO When sort pushdown respects to the alternatives, and removes soft SortExecs this should be changed + // let expected_optimized = [ + // "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Linear]", + // " ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]", + // " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", + // ]; Ok(()) } @@ -991,7 +1193,8 @@ async fn test_soft_hard_requirements_with_multiple_soft_requirements_and_output_ Distribution::SinglePartition, None, )); - + let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "OutputRequirementExec: order_by=[(non_nullable_col@1, asc)], dist_by=SinglePartition", " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", @@ -999,14 +1202,6 @@ async fn test_soft_hard_requirements_with_multiple_soft_requirements_and_output_ " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", ]; - // TODO When sort pushdown respects to the alternatives, and removes soft SortExecs this should be changed - // let expected_optimized = [ - // "OutputRequirementExec", - // " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", - // " SortExec: expr=[non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]", - // " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Linear]", - // " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - // ]; let expected_optimized = [ "OutputRequirementExec: order_by=[(non_nullable_col@1, asc)], dist_by=SinglePartition", " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", @@ -1015,7 +1210,15 @@ async fn test_soft_hard_requirements_with_multiple_soft_requirements_and_output_ " SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); + "#); + // TODO When sort pushdown respects to the alternatives, and removes soft SortExecs this should be changed + // let expected_optimized = [ + // "OutputRequirementExec", + // " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]", + // " SortExec: expr=[non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]", + // " BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Linear]", + // " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", + // ]; Ok(()) } @@ -1050,6 +1253,8 @@ async fn test_window_multi_path_sort() -> Result<()> { // During the removal of `SortExec`s, it should be able to remove the // corresponding SortExecs together. Also, the inputs of these `SortExec`s // are not necessarily the same to be able to remove them. + let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " SortPreservingMergeExec: [nullable_col@0 DESC NULLS LAST]", @@ -1066,7 +1271,7 @@ async fn test_window_multi_path_sort() -> Result<()> { " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC, non_nullable_col@1 ASC], file_type=parquet", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); + "#); Ok(()) } @@ -1090,6 +1295,8 @@ async fn test_window_multi_path_sort2() -> Result<()> { // The `WindowAggExec` can get its required sorting from the leaf nodes directly. // The unnecessary SortExecs should be removed + let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]", @@ -1106,7 +1313,7 @@ async fn test_window_multi_path_sort2() -> Result<()> { " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); + "#); Ok(()) } @@ -1140,7 +1347,9 @@ async fn test_union_inputs_different_sorted_with_limit() -> Result<()> { let ordering3 = [sort_expr("nullable_col", &schema)].into(); let physical_plan = sort_preserving_merge_exec(ordering3, union); + let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); // Should not change the unnecessarily fine `SortExec`s because there is `LimitExec` + assert_snapshot!(test.run(), @r#" let expected_input = [ "SortPreservingMergeExec: [nullable_col@0 ASC]", " UnionExec", @@ -1161,7 +1370,7 @@ async fn test_union_inputs_different_sorted_with_limit() -> Result<()> { " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); + "#); Ok(()) } @@ -1336,6 +1545,12 @@ async fn test_sort_merge_join_complex_order_by() -> Result<()> { .into(); let physical_plan = sort_preserving_merge_exec(ordering, join.clone()); + let test = EnforceSortingTest::new(physical_plan) + .with_repartition_sorts(true) + .with_expected_description( + "// can not push down the sort requirements, need to add SortExec", + ); + assert_snapshot!(test.run(), @r#" let expected_input = [ "SortPreservingMergeExec: [col_b@3 ASC, col_a@2 ASC]", " SortMergeJoin: join_type=Inner, on=[(nullable_col@0, col_a@0)]", @@ -1351,7 +1566,7 @@ async fn test_sort_merge_join_complex_order_by() -> Result<()> { " SortExec: expr=[col_a@0 ASC], preserve_partitioning=[false]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); + "#); // order by (nullable_col, col_b, col_a) let ordering2 = [ @@ -1361,7 +1576,12 @@ async fn test_sort_merge_join_complex_order_by() -> Result<()> { ] .into(); let physical_plan = sort_preserving_merge_exec(ordering2, join); - + let test = EnforceSortingTest::new(physical_plan) + .with_repartition_sorts(true) + .with_expected_description( + "// Can push down the sort requirements since col_a = nullable_col", + ); + assert_snapshot!(test.run(), @r#" let expected_input = [ "SortPreservingMergeExec: [nullable_col@0 ASC, col_b@3 ASC, col_a@2 ASC]", " SortMergeJoin: join_type=Inner, on=[(nullable_col@0, col_a@0)]", @@ -1376,7 +1596,7 @@ async fn test_sort_merge_join_complex_order_by() -> Result<()> { " SortExec: expr=[col_a@0 ASC, col_b@1 ASC], preserve_partitioning=[false]", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); + "#); Ok(()) } @@ -1398,6 +1618,8 @@ async fn test_multilayer_coalesce_partitions() -> Result<()> { // CoalescePartitionsExec and SortExec are not directly consecutive. In this case // we should be able to parallelize Sorting also (given that executors in between don't require) // single partition. + let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", " FilterExec: NOT non_nullable_col@1", @@ -1412,7 +1634,7 @@ async fn test_multilayer_coalesce_partitions() -> Result<()> { " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); + "#); Ok(()) } @@ -1430,6 +1652,8 @@ async fn test_with_lost_ordering_bounded() -> Result<()> { let coalesce_partitions = coalesce_partitions_exec(repartition_hash); let physical_plan = sort_exec([sort_expr("a", &schema)].into(), coalesce_partitions); + let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "SortExec: expr=[a@0 ASC], preserve_partitioning=[false]", " CoalescePartitionsExec", @@ -1444,8 +1668,7 @@ async fn test_with_lost_ordering_bounded() -> Result<()> { " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=csv, has_header=false", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); - + "#); Ok(()) } @@ -1548,6 +1771,9 @@ async fn test_do_not_pushdown_through_spm() -> Result<()> { let spm = sort_preserving_merge_exec(sort_exprs.into(), repartition_rr); let physical_plan = sort_exec([sort_expr("b", &schema)].into(), spm); + let test = + EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "SortExec: expr=[b@1 ASC], preserve_partitioning=[false]", " SortPreservingMergeExec: [a@0 ASC, b@1 ASC]", @@ -1560,7 +1786,7 @@ async fn test_do_not_pushdown_through_spm() -> Result<()> { " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC, b@1 ASC], file_type=csv, has_header=false", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, false); + "#); Ok(()) } @@ -1581,20 +1807,22 @@ async fn test_pushdown_through_spm() -> Result<()> { .into(), spm, ); - + let test = + EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "SortExec: expr=[a@0 ASC, b@1 ASC, c@2 ASC], preserve_partitioning=[false]", " SortPreservingMergeExec: [a@0 ASC, b@1 ASC]", " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC, b@1 ASC], file_type=csv, has_header=false", ]; - let expected_optimized = ["SortPreservingMergeExec: [a@0 ASC, b@1 ASC]", + let expected_optimized = [ + "SortPreservingMergeExec: [a@0 ASC, b@1 ASC]", " SortExec: expr=[a@0 ASC, b@1 ASC, c@2 ASC], preserve_partitioning=[true]", " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC, b@1 ASC], file_type=csv, has_header=false", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, false); - + "#); Ok(()) } @@ -1609,6 +1837,9 @@ async fn test_window_multi_layer_requirement() -> Result<()> { let spm = sort_preserving_merge_exec(sort_exprs.clone().into(), repartition); let physical_plan = bounded_window_exec("a", sort_exprs, spm); + let test = + EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " SortPreservingMergeExec: [a@0 ASC, b@1 ASC]", @@ -1619,13 +1850,13 @@ async fn test_window_multi_layer_requirement() -> Result<()> { ]; let expected_optimized = [ "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " SortExec: expr=[a@0 ASC, b@1 ASC], preserve_partitioning=[false]", - " CoalescePartitionsExec", + " SortPreservingMergeExec: [a@0 ASC, b@1 ASC]", + " SortExec: expr=[a@0 ASC, b@1 ASC], preserve_partitioning=[true]", " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10", " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", " DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, false); + "#); Ok(()) } @@ -1643,12 +1874,20 @@ async fn test_not_replaced_with_partial_sort_for_bounded_input() -> Result<()> { .into(), parquet_input, ); + let test = EnforceSortingTest::new(physical_plan.clone()) + .with_repartition_sorts(false) + .with_expect_no_change(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "SortExec: expr=[a@0 ASC, b@1 ASC, c@2 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[b@1 ASC, c@2 ASC], file_type=parquet" + " DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[b@1 ASC, c@2 ASC], file_type=parquet", ]; - let expected_no_change = expected_input; - assert_optimized!(expected_input, expected_no_change, physical_plan, false); + let expected_optimized = [ + "SortExec: expr=[a@0 ASC, b@1 ASC, c@2 ASC], preserve_partitioning=[false]", + " DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[b@1 ASC, c@2 ASC], file_type=parquet", + ]; + "#); + Ok(()) } @@ -1747,6 +1986,9 @@ async fn test_remove_unnecessary_sort() -> Result<()> { let input = sort_exec([sort_expr("non_nullable_col", &schema)].into(), source); let physical_plan = sort_exec([sort_expr("nullable_col", &schema)].into(), input); + let test = + EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", " SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]", @@ -1756,7 +1998,7 @@ async fn test_remove_unnecessary_sort() -> Result<()> { "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", " DataSourceExec: partitions=1, partition_sizes=[0]", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); + "#); Ok(()) } @@ -1795,6 +2037,9 @@ async fn test_remove_unnecessary_sort_window_multilayer() -> Result<()> { ); let physical_plan = bounded_window_exec("non_nullable_col", ordering2, filter); + let test = + EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " FilterExec: NOT non_nullable_col@1", @@ -1802,18 +2047,17 @@ async fn test_remove_unnecessary_sort_window_multilayer() -> Result<()> { " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " CoalesceBatchesExec: target_batch_size=128", " SortExec: expr=[non_nullable_col@1 DESC], preserve_partitioning=[false]", - " DataSourceExec: partitions=1, partition_sizes=[0]" + " DataSourceExec: partitions=1, partition_sizes=[0]", ]; - let expected_optimized = [ "WindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]", " FilterExec: NOT non_nullable_col@1", " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " CoalesceBatchesExec: target_batch_size=128", " SortExec: expr=[non_nullable_col@1 DESC], preserve_partitioning=[false]", - " DataSourceExec: partitions=1, partition_sizes=[0]" + " DataSourceExec: partitions=1, partition_sizes=[0]", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); + "#); Ok(()) } @@ -1825,6 +2069,9 @@ async fn test_add_required_sort() -> Result<()> { let ordering = [sort_expr("nullable_col", &schema)].into(); let physical_plan = sort_preserving_merge_exec(ordering, source); + let test = + EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "SortPreservingMergeExec: [nullable_col@0 ASC]", " DataSourceExec: partitions=1, partition_sizes=[0]", @@ -1833,7 +2080,7 @@ async fn test_add_required_sort() -> Result<()> { "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", " DataSourceExec: partitions=1, partition_sizes=[0]", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); + "#); Ok(()) } @@ -1848,6 +2095,9 @@ async fn test_remove_unnecessary_sort1() -> Result<()> { let sort = sort_exec(ordering.clone(), spm); let physical_plan = sort_preserving_merge_exec(ordering, sort); + let test = + EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "SortPreservingMergeExec: [nullable_col@0 ASC]", " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", @@ -1859,7 +2109,7 @@ async fn test_remove_unnecessary_sort1() -> Result<()> { "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", " DataSourceExec: partitions=1, partition_sizes=[0]", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); + "#); Ok(()) } @@ -1882,6 +2132,9 @@ async fn test_remove_unnecessary_sort2() -> Result<()> { let sort3 = sort_exec(ordering3, spm2); let physical_plan = repartition_exec(repartition_exec(sort3)); + let test = + EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10", " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", @@ -1897,7 +2150,7 @@ async fn test_remove_unnecessary_sort2() -> Result<()> { " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", " DataSourceExec: partitions=1, partition_sizes=[0]", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); + "#); Ok(()) } @@ -1925,6 +2178,9 @@ async fn test_remove_unnecessary_sort3() -> Result<()> { // When removing a `SortPreservingMergeExec`, make sure that partitioning // requirements are not violated. In some cases, we may need to replace // it with a `CoalescePartitionsExec` instead of directly removing it. + let test = + EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "AggregateExec: mode=Final, gby=[], aggr=[]", " SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]", @@ -1940,7 +2196,7 @@ async fn test_remove_unnecessary_sort3() -> Result<()> { " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", " DataSourceExec: partitions=1, partition_sizes=[0]", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); + "#); Ok(()) } @@ -1970,6 +2226,9 @@ async fn test_remove_unnecessary_sort4() -> Result<()> { // When removing a `SortPreservingMergeExec`, make sure that partitioning // requirements are not violated. In some cases, we may need to replace // it with a `CoalescePartitionsExec` instead of directly removing it. + let test = + EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", " FilterExec: NOT non_nullable_col@1", @@ -1991,7 +2250,7 @@ async fn test_remove_unnecessary_sort4() -> Result<()> { " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", " DataSourceExec: partitions=1, partition_sizes=[0]", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); + "#); Ok(()) } @@ -2013,7 +2272,9 @@ async fn test_remove_unnecessary_sort6() -> Result<()> { .into(), input, ); - + let test = + EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "SortExec: expr=[non_nullable_col@1 ASC, nullable_col@0 ASC], preserve_partitioning=[false]", " SortExec: TopK(fetch=2), expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]", @@ -2023,7 +2284,7 @@ async fn test_remove_unnecessary_sort6() -> Result<()> { "SortExec: TopK(fetch=2), expr=[non_nullable_col@1 ASC, nullable_col@0 ASC], preserve_partitioning=[false]", " DataSourceExec: partitions=1, partition_sizes=[0]", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); + "#); Ok(()) } @@ -2046,6 +2307,9 @@ async fn test_remove_unnecessary_sort7() -> Result<()> { input, ); + let test = + EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "SortExec: TopK(fetch=2), expr=[non_nullable_col@1 ASC], preserve_partitioning=[false], sort_prefix=[non_nullable_col@1 ASC]", " SortExec: expr=[non_nullable_col@1 ASC, nullable_col@0 ASC], preserve_partitioning=[false]", @@ -2056,7 +2320,7 @@ async fn test_remove_unnecessary_sort7() -> Result<()> { " SortExec: expr=[non_nullable_col@1 ASC, nullable_col@0 ASC], preserve_partitioning=[false]", " DataSourceExec: partitions=1, partition_sizes=[0]", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); + "#); Ok(()) } @@ -2076,6 +2340,9 @@ async fn test_remove_unnecessary_sort8() -> Result<()> { limit, ); + let test = + EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "SortExec: expr=[non_nullable_col@1 ASC, nullable_col@0 ASC], preserve_partitioning=[false]", " LocalLimitExec: fetch=2", @@ -2087,7 +2354,7 @@ async fn test_remove_unnecessary_sort8() -> Result<()> { " SortExec: TopK(fetch=2), expr=[non_nullable_col@1 ASC, nullable_col@0 ASC], preserve_partitioning=[false]", " DataSourceExec: partitions=1, partition_sizes=[0]", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); + "#); Ok(()) } @@ -2100,6 +2367,9 @@ async fn test_do_not_pushdown_through_limit() -> Result<()> { let limit = Arc::new(GlobalLimitExec::new(input, 0, Some(5))) as _; let physical_plan = sort_exec([sort_expr("nullable_col", &schema)].into(), limit); + let test = + EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", " GlobalLimitExec: skip=0, fetch=5", @@ -2112,7 +2382,7 @@ async fn test_do_not_pushdown_through_limit() -> Result<()> { " SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]", " DataSourceExec: partitions=1, partition_sizes=[0]", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); + "#); Ok(()) } @@ -2127,6 +2397,9 @@ async fn test_remove_unnecessary_spm1() -> Result<()> { let physical_plan = sort_preserving_merge_exec([sort_expr("nullable_col", &schema)].into(), input2); + let test = + EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "SortPreservingMergeExec: [nullable_col@0 ASC]", " SortPreservingMergeExec: [non_nullable_col@1 ASC]", @@ -2137,7 +2410,7 @@ async fn test_remove_unnecessary_spm1() -> Result<()> { "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", " DataSourceExec: partitions=1, partition_sizes=[0]", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); + "#); Ok(()) } @@ -2152,6 +2425,8 @@ async fn test_remove_unnecessary_spm2() -> Result<()> { 100, ); + let test = EnforceSortingTest::new(input.clone()).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "SortPreservingMergeExec: [non_nullable_col@1 ASC], fetch=100", " DataSourceExec: partitions=1, partition_sizes=[0]", @@ -2161,7 +2436,7 @@ async fn test_remove_unnecessary_spm2() -> Result<()> { " SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]", " DataSourceExec: partitions=1, partition_sizes=[0]", ]; - assert_optimized!(expected_input, expected_optimized, input, true); + "#); Ok(()) } @@ -2177,6 +2452,9 @@ async fn test_change_wrong_sorting() -> Result<()> { let sort = sort_exec([sort_exprs[0].clone()].into(), source); let physical_plan = sort_preserving_merge_exec(sort_exprs.into(), sort); + let test = + EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]", " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", @@ -2186,7 +2464,7 @@ async fn test_change_wrong_sorting() -> Result<()> { "SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", " DataSourceExec: partitions=1, partition_sizes=[0]", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); + "#); Ok(()) } @@ -2202,7 +2480,9 @@ async fn test_change_wrong_sorting2() -> Result<()> { let spm1 = sort_preserving_merge_exec(sort_exprs.clone().into(), source); let sort2 = sort_exec([sort_exprs[0].clone()].into(), spm1); let physical_plan = sort_preserving_merge_exec([sort_exprs[1].clone()].into(), sort2); - + let test = + EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "SortPreservingMergeExec: [non_nullable_col@1 ASC]", " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", @@ -2213,7 +2493,7 @@ async fn test_change_wrong_sorting2() -> Result<()> { "SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]", " DataSourceExec: partitions=1, partition_sizes=[0]", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); + "#); Ok(()) } @@ -2232,6 +2512,9 @@ async fn test_multiple_sort_window_exec() -> Result<()> { let window_agg2 = bounded_window_exec("non_nullable_col", ordering2, window_agg1); let physical_plan = bounded_window_exec("non_nullable_col", ordering1, window_agg2); + let test = + EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", @@ -2247,7 +2530,7 @@ async fn test_multiple_sort_window_exec() -> Result<()> { " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", " DataSourceExec: partitions=1, partition_sizes=[0]", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); + "#); Ok(()) } @@ -2266,17 +2549,12 @@ async fn test_commutativity() -> Result<()> { let repartition = repartition_exec(window); let orig_plan = sort_exec(sort_exprs.into(), repartition); - let actual = get_plan_string(&orig_plan); - let expected_input = vec![ - "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", - " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: partitions=1, partition_sizes=[0]", - ]; - assert_eq!( - expected_input, actual, - "\n**Original Plan Mismatch\n\nexpected:\n\n{expected_input:#?}\nactual:\n\n{actual:#?}\n\n" - ); + assert_snapshot!(displayable(orig_plan.as_ref()).indent(true), @r#" + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] + RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: partitions=1, partition_sizes=[0] + "#); let config = ConfigOptions::new(); let rules = vec![ @@ -2320,6 +2598,9 @@ async fn test_coalesce_propagate() -> Result<()> { let physical_plan = sort.clone(); // Sort Parallelize rule should end Coalesce + Sort linkage when Sort is Global Sort // Also input plan is not valid as it is. We need to add SortExec before SortPreservingMergeExec. + let test = + EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", " SortPreservingMergeExec: [nullable_col@0 ASC]", @@ -2335,7 +2616,7 @@ async fn test_coalesce_propagate() -> Result<()> { " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", " DataSourceExec: partitions=1, partition_sizes=[0]", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); + "#); Ok(()) } @@ -2354,17 +2635,19 @@ async fn test_replace_with_partial_sort2() -> Result<()> { .into(), unbounded_input, ); - + let test = + EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "SortExec: expr=[a@0 ASC, c@2 ASC, d@3 ASC], preserve_partitioning=[false]", - " StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC, c@2 ASC]" + " StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC, c@2 ASC]", ]; - // let optimized let expected_optimized = [ "PartialSortExec: expr=[a@0 ASC, c@2 ASC, d@3 ASC], common_prefix_length=[2]", " StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC, c@2 ASC]", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); + "#); + Ok(()) } @@ -2380,15 +2663,24 @@ async fn test_push_with_required_input_ordering_prohibited() -> Result<()> { .with_maintains_input_order(true) .into_arc(); let plan = sort_exec(ordering_b, plan); - + let test = EnforceSortingTest::new(plan.clone()) + .with_repartition_sorts(true) + // should not be able to push shorts + .with_expect_no_change(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ - "SortExec: expr=[b@1 ASC], preserve_partitioning=[false]", // <-- can't push this down - " RequiredInputOrderingExec", // <-- this requires input sorted by a, and preserves the input order + "SortExec: expr=[b@1 ASC], preserve_partitioning=[false]", + " RequiredInputOrderingExec", " SortExec: expr=[a@0 ASC], preserve_partitioning=[false]", " DataSourceExec: partitions=1, partition_sizes=[0]", ]; - // should not be able to push shorts - assert_optimized!(expected_input, expected_input, plan, true); + let expected_optimized = [ + "SortExec: expr=[b@1 ASC], preserve_partitioning=[false]", + " RequiredInputOrderingExec", + " SortExec: expr=[a@0 ASC], preserve_partitioning=[false]", + " DataSourceExec: partitions=1, partition_sizes=[0]", + ]; + "#); Ok(()) } @@ -2406,19 +2698,31 @@ async fn test_push_with_required_input_ordering_allowed() -> Result<()> { .into_arc(); let plan = sort_exec(ordering_ab, plan); + /* let expected_input = [ "SortExec: expr=[a@0 ASC, b@1 ASC], preserve_partitioning=[false]", // <-- can push this down (as it is compatible with the required input ordering) " RequiredInputOrderingExec", // <-- this requires input sorted by a, and preserves the input order " SortExec: expr=[a@0 ASC], preserve_partitioning=[false]", " DataSourceExec: partitions=1, partition_sizes=[0]", ]; + */ + let test = EnforceSortingTest::new(plan.clone()) + .with_repartition_sorts(true) + .with_expected_description("// Should be able to push down"); + assert_snapshot!(test.run(), @r#" + let expected_input = [ + "SortExec: expr=[a@0 ASC, b@1 ASC], preserve_partitioning=[false]", + " RequiredInputOrderingExec", + " SortExec: expr=[a@0 ASC], preserve_partitioning=[false]", + " DataSourceExec: partitions=1, partition_sizes=[0]", + ]; // Should be able to push down let expected_optimized = [ "RequiredInputOrderingExec", " SortExec: expr=[a@0 ASC, b@1 ASC], preserve_partitioning=[false]", " DataSourceExec: partitions=1, partition_sizes=[0]", ]; - assert_optimized!(expected_input, expected_optimized, plan, true); + "#); Ok(()) } @@ -2432,15 +2736,18 @@ async fn test_replace_with_partial_sort() -> Result<()> { unbounded_input, ); + let test = + EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "SortExec: expr=[a@0 ASC, c@2 ASC], preserve_partitioning=[false]", - " StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC]" + " StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC]", ]; let expected_optimized = [ "PartialSortExec: expr=[a@0 ASC, c@2 ASC], common_prefix_length=[1]", " StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC]", ]; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); + "#); Ok(()) } @@ -2458,13 +2765,22 @@ async fn test_not_replaced_with_partial_sort_for_unbounded_input() -> Result<()> .into(), unbounded_input, ); + let test = EnforceSortingTest::new(physical_plan.clone()) + .with_repartition_sorts(true) + .with_expect_no_change(true); + assert_snapshot!(test.run(), @r#" let expected_input = [ "SortExec: expr=[a@0 ASC, b@1 ASC, c@2 ASC], preserve_partitioning=[false]", - " StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC]" + " StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC]", + ]; + let expected_optimized = [ + "SortExec: expr=[a@0 ASC, b@1 ASC, c@2 ASC], preserve_partitioning=[false]", + " StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC]", ]; - assert_optimized!(expected_input, expected_input, physical_plan, true); + "#); Ok(()) } +// aal here #[tokio::test] async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> { From d3a30c6c6d49bcd2c6ca26c055e68a61fdddfcc6 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 3 Oct 2025 11:44:52 -0400 Subject: [PATCH 02/14] port some more --- .../physical_optimizer/enforce_sorting.rs | 35 +++++++++++-------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs index 74c56196d4181..4160bbaf956e2 100644 --- a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs +++ b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs @@ -4058,18 +4058,20 @@ fn test_removes_unused_orthogonal_sort() -> Result<()> { let output_sort = sort_exec(input_ordering, orthogonal_sort); // same sort as data source // Test scenario/input has an orthogonal sort: + let test = EnforceSortingTest::new(output_sort).with_repartition_sorts(true) + .with_expected_description("// Test: should remove orthogonal sort, and the uppermost (unneeded) sort:"); + + assert_snapshot!(test.run(), @r#" let expected_input = [ "SortExec: expr=[b@1 ASC, c@2 ASC], preserve_partitioning=[false]", " SortExec: expr=[a@0 ASC], preserve_partitioning=[false]", - " StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC]" + " StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC]", ]; - assert_eq!(get_plan_string(&output_sort), expected_input); - // Test: should remove orthogonal sort, and the uppermost (unneeded) sort: let expected_optimized = [ - "StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC]" + "StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC]", ]; - assert_optimized!(expected_input, expected_optimized, output_sort, true); + "#); Ok(()) } @@ -4085,16 +4087,21 @@ fn test_keeps_used_orthogonal_sort() -> Result<()> { let output_sort = sort_exec(input_ordering, orthogonal_sort); // Test scenario/input has an orthogonal sort: + let test = EnforceSortingTest::new(output_sort).with_repartition_sorts(true) + .with_expected_description("// Test: should keep the orthogonal sort, since it modifies the output:"); + assert_snapshot!(test.run(), @r#" let expected_input = [ "SortExec: expr=[b@1 ASC, c@2 ASC], preserve_partitioning=[false]", " SortExec: TopK(fetch=3), expr=[a@0 ASC], preserve_partitioning=[false]", - " StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC]" + " StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC]", ]; - assert_eq!(get_plan_string(&output_sort), expected_input); - // Test: should keep the orthogonal sort, since it modifies the output: - let expected_optimized = expected_input; - assert_optimized!(expected_input, expected_optimized, output_sort, true); + let expected_optimized = [ + "SortExec: expr=[b@1 ASC, c@2 ASC], preserve_partitioning=[false]", + " SortExec: TopK(fetch=3), expr=[a@0 ASC], preserve_partitioning=[false]", + " StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC]", + ]; + "#); Ok(()) } @@ -4115,6 +4122,9 @@ fn test_handles_multiple_orthogonal_sorts() -> Result<()> { let output_sort = sort_exec(input_ordering, orthogonal_sort_3); // final sort // Test scenario/input has an orthogonal sort: + let test = EnforceSortingTest::new(output_sort.clone()).with_repartition_sorts(true) + .with_expected_description("// Test: should keep only the needed orthogonal sort, and remove the unneeded ones:"); + assert_snapshot!(test.run(), @r#" let expected_input = [ "SortExec: expr=[b@1 ASC, c@2 ASC], preserve_partitioning=[false]", " SortExec: expr=[a@0 ASC], preserve_partitioning=[false]", @@ -4123,16 +4133,13 @@ fn test_handles_multiple_orthogonal_sorts() -> Result<()> { " SortExec: expr=[c@2 ASC], preserve_partitioning=[false]", " StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC]", ]; - assert_eq!(get_plan_string(&output_sort), expected_input); - // Test: should keep only the needed orthogonal sort, and remove the unneeded ones: let expected_optimized = [ "SortExec: expr=[b@1 ASC, c@2 ASC], preserve_partitioning=[false]", " SortExec: TopK(fetch=3), expr=[a@0 ASC], preserve_partitioning=[false]", " StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC]", ]; - assert_optimized!(expected_input, expected_optimized, output_sort, true); - + "#); Ok(()) } From c0a6fb1f5486e1e87b6540c15c72ab62f3d6484f Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 3 Oct 2025 11:45:14 -0400 Subject: [PATCH 03/14] fmt --- .../tests/physical_optimizer/enforce_sorting.rs | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs index 4160bbaf956e2..8455bfa886801 100644 --- a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs +++ b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs @@ -4058,8 +4058,11 @@ fn test_removes_unused_orthogonal_sort() -> Result<()> { let output_sort = sort_exec(input_ordering, orthogonal_sort); // same sort as data source // Test scenario/input has an orthogonal sort: - let test = EnforceSortingTest::new(output_sort).with_repartition_sorts(true) - .with_expected_description("// Test: should remove orthogonal sort, and the uppermost (unneeded) sort:"); + let test = EnforceSortingTest::new(output_sort) + .with_repartition_sorts(true) + .with_expected_description( + "// Test: should remove orthogonal sort, and the uppermost (unneeded) sort:", + ); assert_snapshot!(test.run(), @r#" let expected_input = [ @@ -4087,8 +4090,11 @@ fn test_keeps_used_orthogonal_sort() -> Result<()> { let output_sort = sort_exec(input_ordering, orthogonal_sort); // Test scenario/input has an orthogonal sort: - let test = EnforceSortingTest::new(output_sort).with_repartition_sorts(true) - .with_expected_description("// Test: should keep the orthogonal sort, since it modifies the output:"); + let test = EnforceSortingTest::new(output_sort) + .with_repartition_sorts(true) + .with_expected_description( + "// Test: should keep the orthogonal sort, since it modifies the output:", + ); assert_snapshot!(test.run(), @r#" let expected_input = [ "SortExec: expr=[b@1 ASC, c@2 ASC], preserve_partitioning=[false]", From 8e33dd4b73788cf5acca7f4814cee346191ebab9 Mon Sep 17 00:00:00 2001 From: blaginin Date: Sat, 4 Oct 2025 18:53:26 +0100 Subject: [PATCH 04/14] Cleanup representation --- .../physical_optimizer/enforce_sorting.rs | 1536 ++++++++--------- 1 file changed, 728 insertions(+), 808 deletions(-) diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs index 8455bfa886801..424e79f1ed12d 100644 --- a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs +++ b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs @@ -127,7 +127,7 @@ impl EnforceSortingTest { /// Add an expected output description fn with_expected_description(mut self, description: &str) -> Self { - self.expected_description = Some(format!("{description}\n")); + self.expected_description = Some(description.to_string()); self } @@ -203,24 +203,26 @@ impl EnforceSortingTest { .indent(true) .to_string(); - let expected_input: Vec<&str> = input_plan_string.trim().lines().collect(); - let expected_optimized: Vec<&str> = - optimized_plan_string.trim().lines().collect(); + let expected_description = + if let Some(desc) = self.expected_description.as_deref() { + format!("{desc}\n") + } else { + "".to_string() + }; if self.expect_no_change { - assert_eq!(expected_input, expected_optimized, - "Expected no change in the plan, but the optimized plan differs from the input plan:\n\n\ - Input Plan:\n{expected_input:#?}\n\nOptimized Plan:\n{expected_optimized:#?}\n" + assert_eq!(input_plan_string, optimized_plan_string, + "Expected no change in the plan, but the optimized plan differs from the input plan" ); - } - let expected_description = self.expected_description.as_deref().unwrap_or(""); + return format!( + "{expected_description}Input / Optimized Plan:\n{input_plan_string}", + ); + } - // return a string with both input and optimized plan format!( - "let expected_input = {expected_input:#?};\n\ - {expected_description}let expected_optimized = {expected_optimized:#?};", - ) + "Input Plan:\n{input_plan_string}\n{expected_description}Optimized Plan:\n{optimized_plan_string}", + ) } } @@ -327,19 +329,18 @@ async fn test_remove_unnecessary_sort5() -> Result<()> { let physical_plan = sort_exec([sort_expr("a", &join.schema())].into(), join); let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortExec: expr=[a@2 ASC], preserve_partitioning=[false]", - " HashJoinExec: mode=Partitioned, join_type=Inner, on=[(col_a@0, c@2)]", - " DataSourceExec: partitions=1, partition_sizes=[0]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet", - ]; - let expected_optimized = [ - "HashJoinExec: mode=Partitioned, join_type=Inner, on=[(col_a@0, c@2)]", - " DataSourceExec: partitions=1, partition_sizes=[0]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet", - ]; - "#); + assert_snapshot!(test.run(), @r" + Input Plan: + SortExec: expr=[a@2 ASC], preserve_partitioning=[false] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(col_a@0, c@2)] + DataSourceExec: partitions=1, partition_sizes=[0] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet + + Optimized Plan: + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(col_a@0, c@2)] + DataSourceExec: partitions=1, partition_sizes=[0] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet + "); Ok(()) } @@ -363,28 +364,27 @@ async fn test_do_not_remove_sort_with_limit() -> Result<()> { let test = EnforceSortingTest::new(physical_plan) .with_repartition_sorts(true) .with_expected_description("// We should keep the bottom `SortExec`."); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]", - " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2", - " UnionExec", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet", - " LocalLimitExec: fetch=100", - " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - ]; + assert_snapshot!(test.run(), @r" + Input Plan: + SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC] + RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2 + UnionExec + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet + LocalLimitExec: fetch=100 + SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + // We should keep the bottom `SortExec`. - let expected_optimized = [ - "SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]", - " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[true]", - " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2", - " UnionExec", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet", - " LocalLimitExec: fetch=100", - " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - ]; - "#); + Optimized Plan: + SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC] + SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[true] + RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2 + UnionExec + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet + LocalLimitExec: fetch=100 + SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + "); Ok(()) } @@ -405,23 +405,15 @@ async fn test_union_inputs_sorted() -> Result<()> { .with_expected_description("// should not add a sort at the output of the union, input plan should not be changed") .with_expect_no_change(true); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet", - " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - ]; + assert_snapshot!(test.run(), @r" // should not add a sort at the output of the union, input plan should not be changed - let expected_optimized = [ - "SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet", - " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - ]; - "#); + Input / Optimized Plan: + SortPreservingMergeExec: [nullable_col@0 ASC] + UnionExec + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + "); Ok(()) } @@ -446,23 +438,15 @@ async fn test_union_inputs_different_sorted() -> Result<()> { .with_repartition_sorts(true) .with_expected_description("// should not add a sort at the output of the union, input plan should not be changed") .with_expect_no_change(true); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC, non_nullable_col@1 ASC], file_type=parquet", - " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - ]; + assert_snapshot!(test.run(), @r" // should not add a sort at the output of the union, input plan should not be changed - let expected_optimized = [ - "SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC, non_nullable_col@1 ASC], file_type=parquet", - " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - ]; - "#); + Input / Optimized Plan: + SortPreservingMergeExec: [nullable_col@0 ASC] + UnionExec + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC, non_nullable_col@1 ASC], file_type=parquet + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + "); Ok(()) } @@ -486,23 +470,22 @@ async fn test_union_inputs_different_sorted2() -> Result<()> { // First DataSourceExec has output ordering(nullable_col@0 ASC). However, it doesn't satisfy the // required ordering of SortPreservingMergeExec. let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]", - " UnionExec", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet", - " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - ]; - let expected_optimized = [ - "SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]", - " UnionExec", - " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet", - " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - ]; - "#); + assert_snapshot!(test.run(), @r" + Input Plan: + SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC] + UnionExec + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet + SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + + Optimized Plan: + SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC] + UnionExec + SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet + SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + "); Ok(()) } @@ -532,27 +515,26 @@ async fn test_union_inputs_different_sorted3() -> Result<()> { "// should adjust sorting in the first input of the union such that it is not unnecessarily fine" ); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec", - " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet", - " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - ]; + assert_snapshot!(test.run(), @r" + Input Plan: + SortPreservingMergeExec: [nullable_col@0 ASC] + UnionExec + SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + // should adjust sorting in the first input of the union such that it is not unnecessarily fine - let expected_optimized = [ - "SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec", - " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet", - " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - ]; - "#); + Optimized Plan: + SortPreservingMergeExec: [nullable_col@0 ASC] + UnionExec + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + "); Ok(()) } @@ -577,27 +559,26 @@ async fn test_union_inputs_different_sorted4() -> Result<()> { // `UnionExec` satisfy the ordering, OR add a single sort after // the `UnionExec` (both of which are equally good for this example). let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]", - " UnionExec", - " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet", - " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - ]; - let expected_optimized = [ - "SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]", - " UnionExec", - " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet", - " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - ]; - "#); + assert_snapshot!(test.run(), @r" + Input Plan: + SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC] + UnionExec + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + + Optimized Plan: + SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC] + UnionExec + SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet + SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + "); Ok(()) } @@ -633,24 +614,23 @@ async fn test_union_inputs_different_sorted5() -> Result<()> { // example below. However, we should be able to change the unnecessarily // fine `SortExec`s below with required `SortExec`s that are absolutely necessary. let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec", - " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - ]; - let expected_optimized = [ - "SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec", - " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - ]; - "#); + assert_snapshot!(test.run(), @r" + Input Plan: + SortPreservingMergeExec: [nullable_col@0 ASC] + UnionExec + SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + + Optimized Plan: + SortPreservingMergeExec: [nullable_col@0 ASC] + UnionExec + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + "); Ok(()) } @@ -685,30 +665,29 @@ async fn test_union_inputs_different_sorted6() -> Result<()> { "// Should adjust the requirement in the third input of the union so\n\ // that it is not unnecessarily fine.", ); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec", - " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet", - " SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]", - " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - ]; + assert_snapshot!(test.run(), @r" + Input Plan: + SortPreservingMergeExec: [nullable_col@0 ASC] + UnionExec + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet + SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC] + RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + // Should adjust the requirement in the third input of the union so // that it is not unnecessarily fine. - let expected_optimized = [ - "SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec", - " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet", - " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[true]", - " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - ]; - "#); + Optimized Plan: + SortPreservingMergeExec: [nullable_col@0 ASC] + UnionExec + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[true] + RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + "); Ok(()) } @@ -731,25 +710,24 @@ async fn test_union_inputs_different_sorted7() -> Result<()> { // Union has unnecessarily fine ordering below it. We should be able to replace them with absolutely necessary ordering. let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true) .with_expected_description("// Union preserves the inputs ordering and we should not change any of the SortExecs under UnionExec"); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec", - " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - ]; + assert_snapshot!(test.run(), @r" + Input Plan: + SortPreservingMergeExec: [nullable_col@0 ASC] + UnionExec + SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + // Union preserves the inputs ordering and we should not change any of the SortExecs under UnionExec - let expected_optimized = [ - "SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec", - " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - ]; - "#); + Optimized Plan: + SortPreservingMergeExec: [nullable_col@0 ASC] + UnionExec + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + "); Ok(()) } @@ -794,22 +772,21 @@ async fn test_union_inputs_different_sorted8() -> Result<()> { "// Since `UnionExec` doesn't preserve ordering in the plan above.\n\ // We shouldn't keep SortExecs in the plan.", ); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "UnionExec", - " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - " SortExec: expr=[nullable_col@0 DESC NULLS LAST, non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - ]; + assert_snapshot!(test.run(), @r" + Input Plan: + UnionExec + SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + SortExec: expr=[nullable_col@0 DESC NULLS LAST, non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + // Since `UnionExec` doesn't preserve ordering in the plan above. // We shouldn't keep SortExecs in the plan. - let expected_optimized = [ - "UnionExec", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - ]; - "#); + Optimized Plan: + UnionExec + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + "); Ok(()) } @@ -834,16 +811,15 @@ async fn test_soft_hard_requirements_remove_soft_requirement() -> Result<()> { let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); assert_snapshot!(test.run(), @r#" - let expected_input = [ - "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - ]; - let expected_optimized = [ - "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - ]; + Input Plan: + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + + Optimized Plan: + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet "#); // TODO When sort pushdown respects to the alternatives, and removes soft SortExecs this should be changed // let expected_optimized = [ @@ -883,18 +859,17 @@ async fn test_soft_hard_requirements_remove_soft_requirement_without_pushdowns( let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); assert_snapshot!(test.run(), @r#" - let expected_input = [ - "ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as count]", - " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - ]; - let expected_optimized = [ - "ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as count]", - " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - ]; + Input Plan: + ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as count] + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + + Optimized Plan: + ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as count] + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet "#); // TODO When sort pushdown respects to the alternatives, and removes soft SortExecs this should be changed // let expected_optimized = [ @@ -932,19 +907,18 @@ async fn test_soft_hard_requirements_remove_soft_requirement_without_pushdowns( let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); assert_snapshot!(test.run(), @r#" - let expected_input = [ - "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]", - " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - ]; - let expected_optimized = [ - "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]", - " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - ]; + Input Plan: + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col] + SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + + Optimized Plan: + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col] + SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet "#); // TODO When sort pushdown respects to the alternatives, and removes soft SortExecs this should be changed // let expected_optimized = [ @@ -994,21 +968,20 @@ async fn test_soft_hard_requirements_multiple_soft_requirements() -> Result<()> let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); assert_snapshot!(test.run(), @r#" - let expected_input = [ - "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]", - " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - ]; - let expected_optimized = [ - "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]", - " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - ]; + Input Plan: + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col] + SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + + Optimized Plan: + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col] + SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet "#); // TODO When sort pushdown respects to the alternatives, and removes soft SortExecs this should be changed // let expected_optimized = [ @@ -1060,23 +1033,22 @@ async fn test_soft_hard_requirements_multiple_soft_requirements() -> Result<()> bounded_window_exec_with_partition("count", vec![], partition_bys, sort3); let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); assert_snapshot!(test.run(), @r#" - let expected_input = [ - "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", - " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]", - " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - ]; - let expected_optimized = [ - "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]", - " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - ]; + Input Plan: + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false] + SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col] + SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + + Optimized Plan: + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col] + SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet "#); // TODO When sort pushdown respects to the alternatives, and removes soft SortExecs this should be changed // let expected_optimized = [ @@ -1131,22 +1103,21 @@ async fn test_soft_hard_requirements_multiple_sorts() -> Result<()> { let physical_plan = sort_exec(ordering2, sort2); let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", - " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]", - " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - ]; - let expected_optimized = [ - "SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]", - " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - ]; + Input Plan: + SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false] + SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col] + SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + + Optimized Plan: + SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col] + SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet "#); // TODO When sort pushdown respects to the alternatives, and removes soft SortExecs this should be changed // let expected_optimized = [ @@ -1195,21 +1166,20 @@ async fn test_soft_hard_requirements_with_multiple_soft_requirements_and_output_ )); let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); assert_snapshot!(test.run(), @r#" - let expected_input = [ - "OutputRequirementExec: order_by=[(non_nullable_col@1, asc)], dist_by=SinglePartition", - " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - ]; - let expected_optimized = [ - "OutputRequirementExec: order_by=[(non_nullable_col@1, asc)], dist_by=SinglePartition", - " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " SortExec: expr=[non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - ]; + Input Plan: + OutputRequirementExec: order_by=[(non_nullable_col@1, asc)], dist_by=SinglePartition + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + + Optimized Plan: + OutputRequirementExec: order_by=[(non_nullable_col@1, asc)], dist_by=SinglePartition + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + SortExec: expr=[non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet "#); // TODO When sort pushdown respects to the alternatives, and removes soft SortExecs this should be changed // let expected_optimized = [ @@ -1255,22 +1225,21 @@ async fn test_window_multi_path_sort() -> Result<()> { // are not necessarily the same to be able to remove them. let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); assert_snapshot!(test.run(), @r#" - let expected_input = [ - "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " SortPreservingMergeExec: [nullable_col@0 DESC NULLS LAST]", - " UnionExec", - " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC, non_nullable_col@1 ASC], file_type=parquet", - " SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet", - ]; - let expected_optimized = [ - "WindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]", - " SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC, non_nullable_col@1 ASC], file_type=parquet", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet", - ]; + Input Plan: + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + SortPreservingMergeExec: [nullable_col@0 DESC NULLS LAST] + UnionExec + SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC, non_nullable_col@1 ASC], file_type=parquet + SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet + + Optimized Plan: + WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + SortPreservingMergeExec: [nullable_col@0 ASC] + UnionExec + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC, non_nullable_col@1 ASC], file_type=parquet + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet "#); Ok(()) @@ -1297,22 +1266,21 @@ async fn test_window_multi_path_sort2() -> Result<()> { // The unnecessary SortExecs should be removed let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); assert_snapshot!(test.run(), @r#" - let expected_input = [ - "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]", - " UnionExec", - " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet", - " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet", - ]; - let expected_optimized = [ - "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet", - ]; + Input Plan: + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC] + UnionExec + SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet + SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet + + Optimized Plan: + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + SortPreservingMergeExec: [nullable_col@0 ASC] + UnionExec + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet "#); Ok(()) @@ -1349,28 +1317,27 @@ async fn test_union_inputs_different_sorted_with_limit() -> Result<()> { let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); // Should not change the unnecessarily fine `SortExec`s because there is `LimitExec` - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec", - " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - " GlobalLimitExec: skip=0, fetch=100", - " LocalLimitExec: fetch=100", - " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - ]; - let expected_optimized = [ - "SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec", - " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - " GlobalLimitExec: skip=0, fetch=100", - " LocalLimitExec: fetch=100", - " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - ]; - "#); + assert_snapshot!(test.run(), @r" + Input Plan: + SortPreservingMergeExec: [nullable_col@0 ASC] + UnionExec + SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + GlobalLimitExec: skip=0, fetch=100 + LocalLimitExec: fetch=100 + SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + + Optimized Plan: + SortPreservingMergeExec: [nullable_col@0 ASC] + UnionExec + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + GlobalLimitExec: skip=0, fetch=100 + LocalLimitExec: fetch=100 + SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + "); Ok(()) } @@ -1550,23 +1517,22 @@ async fn test_sort_merge_join_complex_order_by() -> Result<()> { .with_expected_description( "// can not push down the sort requirements, need to add SortExec", ); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortPreservingMergeExec: [col_b@3 ASC, col_a@2 ASC]", - " SortMergeJoin: join_type=Inner, on=[(nullable_col@0, col_a@0)]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet", - ]; + assert_snapshot!(test.run(), @r" + Input Plan: + SortPreservingMergeExec: [col_b@3 ASC, col_a@2 ASC] + SortMergeJoin: join_type=Inner, on=[(nullable_col@0, col_a@0)] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet + // can not push down the sort requirements, need to add SortExec - let expected_optimized = [ - "SortExec: expr=[col_b@3 ASC, nullable_col@0 ASC], preserve_partitioning=[false]", - " SortMergeJoin: join_type=Inner, on=[(nullable_col@0, col_a@0)]", - " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - " SortExec: expr=[col_a@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet", - ]; - "#); + Optimized Plan: + SortExec: expr=[col_b@3 ASC, nullable_col@0 ASC], preserve_partitioning=[false] + SortMergeJoin: join_type=Inner, on=[(nullable_col@0, col_a@0)] + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + SortExec: expr=[col_a@0 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet + "); // order by (nullable_col, col_b, col_a) let ordering2 = [ @@ -1581,22 +1547,21 @@ async fn test_sort_merge_join_complex_order_by() -> Result<()> { .with_expected_description( "// Can push down the sort requirements since col_a = nullable_col", ); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortPreservingMergeExec: [nullable_col@0 ASC, col_b@3 ASC, col_a@2 ASC]", - " SortMergeJoin: join_type=Inner, on=[(nullable_col@0, col_a@0)]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet", - ]; + assert_snapshot!(test.run(), @r" + Input Plan: + SortPreservingMergeExec: [nullable_col@0 ASC, col_b@3 ASC, col_a@2 ASC] + SortMergeJoin: join_type=Inner, on=[(nullable_col@0, col_a@0)] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet + // Can push down the sort requirements since col_a = nullable_col - let expected_optimized = [ - "SortMergeJoin: join_type=Inner, on=[(nullable_col@0, col_a@0)]", - " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - " SortExec: expr=[col_a@0 ASC, col_b@1 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet", - ]; - "#); + Optimized Plan: + SortMergeJoin: join_type=Inner, on=[(nullable_col@0, col_a@0)] + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + SortExec: expr=[col_a@0 ASC, col_b@1 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet + "); Ok(()) } @@ -1619,22 +1584,21 @@ async fn test_multilayer_coalesce_partitions() -> Result<()> { // we should be able to parallelize Sorting also (given that executors in between don't require) // single partition. let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " FilterExec: NOT non_nullable_col@1", - " CoalescePartitionsExec", - " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - ]; - let expected_optimized = [ - "SortPreservingMergeExec: [nullable_col@0 ASC]", - " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[true]", - " FilterExec: NOT non_nullable_col@1", - " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - ]; - "#); + assert_snapshot!(test.run(), @r" + Input Plan: + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] + FilterExec: NOT non_nullable_col@1 + CoalescePartitionsExec + RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + + Optimized Plan: + SortPreservingMergeExec: [nullable_col@0 ASC] + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[true] + FilterExec: NOT non_nullable_col@1 + RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + "); Ok(()) } @@ -1653,22 +1617,21 @@ async fn test_with_lost_ordering_bounded() -> Result<()> { let physical_plan = sort_exec([sort_expr("a", &schema)].into(), coalesce_partitions); let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortExec: expr=[a@0 ASC], preserve_partitioning=[false]", - " CoalescePartitionsExec", - " RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10", - " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=csv, has_header=false", - ]; - let expected_optimized = [ - "SortPreservingMergeExec: [a@0 ASC]", - " SortExec: expr=[a@0 ASC], preserve_partitioning=[true]", - " RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10", - " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=csv, has_header=false", - ]; - "#); + assert_snapshot!(test.run(), @r" + Input Plan: + SortExec: expr=[a@0 ASC], preserve_partitioning=[false] + CoalescePartitionsExec + RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10 + RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=csv, has_header=false + + Optimized Plan: + SortPreservingMergeExec: [a@0 ASC] + SortExec: expr=[a@0 ASC], preserve_partitioning=[true] + RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10 + RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=csv, has_header=false + "); Ok(()) } @@ -1773,20 +1736,19 @@ async fn test_do_not_pushdown_through_spm() -> Result<()> { let test = EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortExec: expr=[b@1 ASC], preserve_partitioning=[false]", - " SortPreservingMergeExec: [a@0 ASC, b@1 ASC]", - " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC, b@1 ASC], file_type=csv, has_header=false", - ]; - let expected_optimized = [ - "SortExec: expr=[b@1 ASC], preserve_partitioning=[false]", - " SortPreservingMergeExec: [a@0 ASC, b@1 ASC]", - " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC, b@1 ASC], file_type=csv, has_header=false", - ]; - "#); + assert_snapshot!(test.run(), @r" + Input Plan: + SortExec: expr=[b@1 ASC], preserve_partitioning=[false] + SortPreservingMergeExec: [a@0 ASC, b@1 ASC] + RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC, b@1 ASC], file_type=csv, has_header=false + + Optimized Plan: + SortExec: expr=[b@1 ASC], preserve_partitioning=[false] + SortPreservingMergeExec: [a@0 ASC, b@1 ASC] + RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC, b@1 ASC], file_type=csv, has_header=false + "); Ok(()) } @@ -1809,20 +1771,19 @@ async fn test_pushdown_through_spm() -> Result<()> { ); let test = EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortExec: expr=[a@0 ASC, b@1 ASC, c@2 ASC], preserve_partitioning=[false]", - " SortPreservingMergeExec: [a@0 ASC, b@1 ASC]", - " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC, b@1 ASC], file_type=csv, has_header=false", - ]; - let expected_optimized = [ - "SortPreservingMergeExec: [a@0 ASC, b@1 ASC]", - " SortExec: expr=[a@0 ASC, b@1 ASC, c@2 ASC], preserve_partitioning=[true]", - " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC, b@1 ASC], file_type=csv, has_header=false", - ]; - "#); + assert_snapshot!(test.run(), @r" + Input Plan: + SortExec: expr=[a@0 ASC, b@1 ASC, c@2 ASC], preserve_partitioning=[false] + SortPreservingMergeExec: [a@0 ASC, b@1 ASC] + RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC, b@1 ASC], file_type=csv, has_header=false + + Optimized Plan: + SortPreservingMergeExec: [a@0 ASC, b@1 ASC] + SortExec: expr=[a@0 ASC, b@1 ASC, c@2 ASC], preserve_partitioning=[true] + RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC, b@1 ASC], file_type=csv, has_header=false + "); Ok(()) } @@ -1840,22 +1801,21 @@ async fn test_window_multi_layer_requirement() -> Result<()> { let test = EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); assert_snapshot!(test.run(), @r#" - let expected_input = [ - "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " SortPreservingMergeExec: [a@0 ASC, b@1 ASC]", - " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC, b@1 ASC", - " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", - " SortExec: expr=[a@0 ASC, b@1 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false", - ]; - let expected_optimized = [ - "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " SortPreservingMergeExec: [a@0 ASC, b@1 ASC]", - " SortExec: expr=[a@0 ASC, b@1 ASC], preserve_partitioning=[true]", - " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10", - " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false", - ]; + Input Plan: + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + SortPreservingMergeExec: [a@0 ASC, b@1 ASC] + RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC, b@1 ASC + RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + SortExec: expr=[a@0 ASC, b@1 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false + + Optimized Plan: + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + SortPreservingMergeExec: [a@0 ASC, b@1 ASC] + SortExec: expr=[a@0 ASC, b@1 ASC], preserve_partitioning=[true] + RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10 + RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false "#); Ok(()) @@ -1877,16 +1837,11 @@ async fn test_not_replaced_with_partial_sort_for_bounded_input() -> Result<()> { let test = EnforceSortingTest::new(physical_plan.clone()) .with_repartition_sorts(false) .with_expect_no_change(true); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortExec: expr=[a@0 ASC, b@1 ASC, c@2 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[b@1 ASC, c@2 ASC], file_type=parquet", - ]; - let expected_optimized = [ - "SortExec: expr=[a@0 ASC, b@1 ASC, c@2 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[b@1 ASC, c@2 ASC], file_type=parquet", - ]; - "#); + assert_snapshot!(test.run(), @r" + Input / Optimized Plan: + SortExec: expr=[a@0 ASC, b@1 ASC, c@2 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[b@1 ASC, c@2 ASC], file_type=parquet + "); Ok(()) } @@ -1988,17 +1943,16 @@ async fn test_remove_unnecessary_sort() -> Result<()> { let test = EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]", - " DataSourceExec: partitions=1, partition_sizes=[0]", - ]; - let expected_optimized = [ - "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: partitions=1, partition_sizes=[0]", - ]; - "#); + assert_snapshot!(test.run(), @r" + Input Plan: + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] + SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false] + DataSourceExec: partitions=1, partition_sizes=[0] + + Optimized Plan: + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] + DataSourceExec: partitions=1, partition_sizes=[0] + "); Ok(()) } @@ -2040,23 +1994,22 @@ async fn test_remove_unnecessary_sort_window_multilayer() -> Result<()> { let test = EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); assert_snapshot!(test.run(), @r#" - let expected_input = [ - "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " FilterExec: NOT non_nullable_col@1", - " SortExec: expr=[non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " CoalesceBatchesExec: target_batch_size=128", - " SortExec: expr=[non_nullable_col@1 DESC], preserve_partitioning=[false]", - " DataSourceExec: partitions=1, partition_sizes=[0]", - ]; - let expected_optimized = [ - "WindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]", - " FilterExec: NOT non_nullable_col@1", - " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " CoalesceBatchesExec: target_batch_size=128", - " SortExec: expr=[non_nullable_col@1 DESC], preserve_partitioning=[false]", - " DataSourceExec: partitions=1, partition_sizes=[0]", - ]; + Input Plan: + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + FilterExec: NOT non_nullable_col@1 + SortExec: expr=[non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + CoalesceBatchesExec: target_batch_size=128 + SortExec: expr=[non_nullable_col@1 DESC], preserve_partitioning=[false] + DataSourceExec: partitions=1, partition_sizes=[0] + + Optimized Plan: + WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + FilterExec: NOT non_nullable_col@1 + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + CoalesceBatchesExec: target_batch_size=128 + SortExec: expr=[non_nullable_col@1 DESC], preserve_partitioning=[false] + DataSourceExec: partitions=1, partition_sizes=[0] "#); Ok(()) @@ -2071,16 +2024,15 @@ async fn test_add_required_sort() -> Result<()> { let test = EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortPreservingMergeExec: [nullable_col@0 ASC]", - " DataSourceExec: partitions=1, partition_sizes=[0]", - ]; - let expected_optimized = [ - "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: partitions=1, partition_sizes=[0]", - ]; - "#); + assert_snapshot!(test.run(), @r" + Input Plan: + SortPreservingMergeExec: [nullable_col@0 ASC] + DataSourceExec: partitions=1, partition_sizes=[0] + + Optimized Plan: + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] + DataSourceExec: partitions=1, partition_sizes=[0] + "); Ok(()) } @@ -2097,19 +2049,18 @@ async fn test_remove_unnecessary_sort1() -> Result<()> { let test = EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortPreservingMergeExec: [nullable_col@0 ASC]", - " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " SortPreservingMergeExec: [nullable_col@0 ASC]", - " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: partitions=1, partition_sizes=[0]", - ]; - let expected_optimized = [ - "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: partitions=1, partition_sizes=[0]", - ]; - "#); + assert_snapshot!(test.run(), @r" + Input Plan: + SortPreservingMergeExec: [nullable_col@0 ASC] + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] + SortPreservingMergeExec: [nullable_col@0 ASC] + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] + DataSourceExec: partitions=1, partition_sizes=[0] + + Optimized Plan: + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] + DataSourceExec: partitions=1, partition_sizes=[0] + "); Ok(()) } @@ -2134,23 +2085,22 @@ async fn test_remove_unnecessary_sort2() -> Result<()> { let test = EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10", - " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", - " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]", - " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", - " SortPreservingMergeExec: [non_nullable_col@1 ASC]", - " SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]", - " DataSourceExec: partitions=1, partition_sizes=[0]", - ]; - let expected_optimized = [ - "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10", - " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", - " DataSourceExec: partitions=1, partition_sizes=[0]", - ]; - "#); + assert_snapshot!(test.run(), @r" + Input Plan: + RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10 + RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] + SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC] + SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false] + SortPreservingMergeExec: [non_nullable_col@1 ASC] + SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false] + DataSourceExec: partitions=1, partition_sizes=[0] + + Optimized Plan: + RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10 + RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + DataSourceExec: partitions=1, partition_sizes=[0] + "); Ok(()) } @@ -2180,23 +2130,22 @@ async fn test_remove_unnecessary_sort3() -> Result<()> { // it with a `CoalescePartitionsExec` instead of directly removing it. let test = EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "AggregateExec: mode=Final, gby=[], aggr=[]", - " SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]", - " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[true]", - " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", - " SortPreservingMergeExec: [non_nullable_col@1 ASC]", - " SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]", - " DataSourceExec: partitions=1, partition_sizes=[0]", - ]; - let expected_optimized = [ - "AggregateExec: mode=Final, gby=[], aggr=[]", - " CoalescePartitionsExec", - " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", - " DataSourceExec: partitions=1, partition_sizes=[0]", - ]; - "#); + assert_snapshot!(test.run(), @r" + Input Plan: + AggregateExec: mode=Final, gby=[], aggr=[] + SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC] + SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[true] + RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + SortPreservingMergeExec: [non_nullable_col@1 ASC] + SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false] + DataSourceExec: partitions=1, partition_sizes=[0] + + Optimized Plan: + AggregateExec: mode=Final, gby=[], aggr=[] + CoalescePartitionsExec + RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + DataSourceExec: partitions=1, partition_sizes=[0] + "); Ok(()) } @@ -2228,29 +2177,28 @@ async fn test_remove_unnecessary_sort4() -> Result<()> { // it with a `CoalescePartitionsExec` instead of directly removing it. let test = EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", - " FilterExec: NOT non_nullable_col@1", - " SortPreservingMergeExec: [non_nullable_col@1 ASC]", - " SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[true]", - " UnionExec", - " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", - " DataSourceExec: partitions=1, partition_sizes=[0]", - " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", - " DataSourceExec: partitions=1, partition_sizes=[0]", - ]; - let expected_optimized = [ - "SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]", - " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[true]", - " FilterExec: NOT non_nullable_col@1", - " UnionExec", - " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", - " DataSourceExec: partitions=1, partition_sizes=[0]", - " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", - " DataSourceExec: partitions=1, partition_sizes=[0]", - ]; - "#); + assert_snapshot!(test.run(), @r" + Input Plan: + SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false] + FilterExec: NOT non_nullable_col@1 + SortPreservingMergeExec: [non_nullable_col@1 ASC] + SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[true] + UnionExec + RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + DataSourceExec: partitions=1, partition_sizes=[0] + RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + DataSourceExec: partitions=1, partition_sizes=[0] + + Optimized Plan: + SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC] + SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[true] + FilterExec: NOT non_nullable_col@1 + UnionExec + RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + DataSourceExec: partitions=1, partition_sizes=[0] + RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + DataSourceExec: partitions=1, partition_sizes=[0] + "); Ok(()) } @@ -2274,17 +2222,16 @@ async fn test_remove_unnecessary_sort6() -> Result<()> { ); let test = EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortExec: expr=[non_nullable_col@1 ASC, nullable_col@0 ASC], preserve_partitioning=[false]", - " SortExec: TopK(fetch=2), expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]", - " DataSourceExec: partitions=1, partition_sizes=[0]", - ]; - let expected_optimized = [ - "SortExec: TopK(fetch=2), expr=[non_nullable_col@1 ASC, nullable_col@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: partitions=1, partition_sizes=[0]", - ]; - "#); + assert_snapshot!(test.run(), @r" + Input Plan: + SortExec: expr=[non_nullable_col@1 ASC, nullable_col@0 ASC], preserve_partitioning=[false] + SortExec: TopK(fetch=2), expr=[non_nullable_col@1 ASC], preserve_partitioning=[false] + DataSourceExec: partitions=1, partition_sizes=[0] + + Optimized Plan: + SortExec: TopK(fetch=2), expr=[non_nullable_col@1 ASC, nullable_col@0 ASC], preserve_partitioning=[false] + DataSourceExec: partitions=1, partition_sizes=[0] + "); Ok(()) } @@ -2309,18 +2256,17 @@ async fn test_remove_unnecessary_sort7() -> Result<()> { let test = EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortExec: TopK(fetch=2), expr=[non_nullable_col@1 ASC], preserve_partitioning=[false], sort_prefix=[non_nullable_col@1 ASC]", - " SortExec: expr=[non_nullable_col@1 ASC, nullable_col@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: partitions=1, partition_sizes=[0]", - ]; - let expected_optimized = [ - "GlobalLimitExec: skip=0, fetch=2", - " SortExec: expr=[non_nullable_col@1 ASC, nullable_col@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: partitions=1, partition_sizes=[0]", - ]; - "#); + assert_snapshot!(test.run(), @r" + Input Plan: + SortExec: TopK(fetch=2), expr=[non_nullable_col@1 ASC], preserve_partitioning=[false], sort_prefix=[non_nullable_col@1 ASC] + SortExec: expr=[non_nullable_col@1 ASC, nullable_col@0 ASC], preserve_partitioning=[false] + DataSourceExec: partitions=1, partition_sizes=[0] + + Optimized Plan: + GlobalLimitExec: skip=0, fetch=2 + SortExec: expr=[non_nullable_col@1 ASC, nullable_col@0 ASC], preserve_partitioning=[false] + DataSourceExec: partitions=1, partition_sizes=[0] + "); Ok(()) } @@ -2342,19 +2288,18 @@ async fn test_remove_unnecessary_sort8() -> Result<()> { let test = EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortExec: expr=[non_nullable_col@1 ASC, nullable_col@0 ASC], preserve_partitioning=[false]", - " LocalLimitExec: fetch=2", - " SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]", - " DataSourceExec: partitions=1, partition_sizes=[0]", - ]; - let expected_optimized = [ - "LocalLimitExec: fetch=2", - " SortExec: TopK(fetch=2), expr=[non_nullable_col@1 ASC, nullable_col@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: partitions=1, partition_sizes=[0]", - ]; - "#); + assert_snapshot!(test.run(), @r" + Input Plan: + SortExec: expr=[non_nullable_col@1 ASC, nullable_col@0 ASC], preserve_partitioning=[false] + LocalLimitExec: fetch=2 + SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false] + DataSourceExec: partitions=1, partition_sizes=[0] + + Optimized Plan: + LocalLimitExec: fetch=2 + SortExec: TopK(fetch=2), expr=[non_nullable_col@1 ASC, nullable_col@0 ASC], preserve_partitioning=[false] + DataSourceExec: partitions=1, partition_sizes=[0] + "); Ok(()) } @@ -2369,20 +2314,19 @@ async fn test_do_not_pushdown_through_limit() -> Result<()> { let test = EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " GlobalLimitExec: skip=0, fetch=5", - " SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]", - " DataSourceExec: partitions=1, partition_sizes=[0]", - ]; - let expected_optimized = [ - "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " GlobalLimitExec: skip=0, fetch=5", - " SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]", - " DataSourceExec: partitions=1, partition_sizes=[0]", - ]; - "#); + assert_snapshot!(test.run(), @r" + Input Plan: + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] + GlobalLimitExec: skip=0, fetch=5 + SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false] + DataSourceExec: partitions=1, partition_sizes=[0] + + Optimized Plan: + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] + GlobalLimitExec: skip=0, fetch=5 + SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false] + DataSourceExec: partitions=1, partition_sizes=[0] + "); Ok(()) } @@ -2399,18 +2343,17 @@ async fn test_remove_unnecessary_spm1() -> Result<()> { let test = EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortPreservingMergeExec: [nullable_col@0 ASC]", - " SortPreservingMergeExec: [non_nullable_col@1 ASC]", - " SortPreservingMergeExec: [non_nullable_col@1 ASC]", - " DataSourceExec: partitions=1, partition_sizes=[0]", - ]; - let expected_optimized = [ - "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: partitions=1, partition_sizes=[0]", - ]; - "#); + assert_snapshot!(test.run(), @r" + Input Plan: + SortPreservingMergeExec: [nullable_col@0 ASC] + SortPreservingMergeExec: [non_nullable_col@1 ASC] + SortPreservingMergeExec: [non_nullable_col@1 ASC] + DataSourceExec: partitions=1, partition_sizes=[0] + + Optimized Plan: + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] + DataSourceExec: partitions=1, partition_sizes=[0] + "); Ok(()) } @@ -2426,17 +2369,16 @@ async fn test_remove_unnecessary_spm2() -> Result<()> { ); let test = EnforceSortingTest::new(input.clone()).with_repartition_sorts(true); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortPreservingMergeExec: [non_nullable_col@1 ASC], fetch=100", - " DataSourceExec: partitions=1, partition_sizes=[0]", - ]; - let expected_optimized = [ - "LocalLimitExec: fetch=100", - " SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]", - " DataSourceExec: partitions=1, partition_sizes=[0]", - ]; - "#); + assert_snapshot!(test.run(), @r" + Input Plan: + SortPreservingMergeExec: [non_nullable_col@1 ASC], fetch=100 + DataSourceExec: partitions=1, partition_sizes=[0] + + Optimized Plan: + LocalLimitExec: fetch=100 + SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false] + DataSourceExec: partitions=1, partition_sizes=[0] + "); Ok(()) } @@ -2454,17 +2396,16 @@ async fn test_change_wrong_sorting() -> Result<()> { let test = EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]", - " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: partitions=1, partition_sizes=[0]", - ]; - let expected_optimized = [ - "SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", - " DataSourceExec: partitions=1, partition_sizes=[0]", - ]; - "#); + assert_snapshot!(test.run(), @r" + Input Plan: + SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC] + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] + DataSourceExec: partitions=1, partition_sizes=[0] + + Optimized Plan: + SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false] + DataSourceExec: partitions=1, partition_sizes=[0] + "); Ok(()) } @@ -2482,18 +2423,17 @@ async fn test_change_wrong_sorting2() -> Result<()> { let physical_plan = sort_preserving_merge_exec([sort_exprs[1].clone()].into(), sort2); let test = EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortPreservingMergeExec: [non_nullable_col@1 ASC]", - " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]", - " DataSourceExec: partitions=1, partition_sizes=[0]", - ]; - let expected_optimized = [ - "SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]", - " DataSourceExec: partitions=1, partition_sizes=[0]", - ]; - "#); + assert_snapshot!(test.run(), @r" + Input Plan: + SortPreservingMergeExec: [non_nullable_col@1 ASC] + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] + SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC] + DataSourceExec: partitions=1, partition_sizes=[0] + + Optimized Plan: + SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false] + DataSourceExec: partitions=1, partition_sizes=[0] + "); Ok(()) } @@ -2515,21 +2455,20 @@ async fn test_multiple_sort_window_exec() -> Result<()> { let test = EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); assert_snapshot!(test.run(), @r#" - let expected_input = [ - "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: partitions=1, partition_sizes=[0]", - ]; - let expected_optimized = [ - "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: partitions=1, partition_sizes=[0]", - ]; + Input Plan: + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] + DataSourceExec: partitions=1, partition_sizes=[0] + + Optimized Plan: + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] + DataSourceExec: partitions=1, partition_sizes=[0] "#); Ok(()) @@ -2600,23 +2539,22 @@ async fn test_coalesce_propagate() -> Result<()> { // Also input plan is not valid as it is. We need to add SortExec before SortPreservingMergeExec. let test = EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " SortPreservingMergeExec: [nullable_col@0 ASC]", - " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[true]", - " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", - " CoalescePartitionsExec", - " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", - " DataSourceExec: partitions=1, partition_sizes=[0]", - ]; - let expected_optimized = [ - "SortPreservingMergeExec: [nullable_col@0 ASC]", - " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[true]", - " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", - " DataSourceExec: partitions=1, partition_sizes=[0]", - ]; - "#); + assert_snapshot!(test.run(), @r" + Input Plan: + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] + SortPreservingMergeExec: [nullable_col@0 ASC] + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[true] + RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + CoalescePartitionsExec + RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + DataSourceExec: partitions=1, partition_sizes=[0] + + Optimized Plan: + SortPreservingMergeExec: [nullable_col@0 ASC] + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[true] + RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + DataSourceExec: partitions=1, partition_sizes=[0] + "); Ok(()) } @@ -2637,16 +2575,15 @@ async fn test_replace_with_partial_sort2() -> Result<()> { ); let test = EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortExec: expr=[a@0 ASC, c@2 ASC, d@3 ASC], preserve_partitioning=[false]", - " StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC, c@2 ASC]", - ]; - let expected_optimized = [ - "PartialSortExec: expr=[a@0 ASC, c@2 ASC, d@3 ASC], common_prefix_length=[2]", - " StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC, c@2 ASC]", - ]; - "#); + assert_snapshot!(test.run(), @r" + Input Plan: + SortExec: expr=[a@0 ASC, c@2 ASC, d@3 ASC], preserve_partitioning=[false] + StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC, c@2 ASC] + + Optimized Plan: + PartialSortExec: expr=[a@0 ASC, c@2 ASC, d@3 ASC], common_prefix_length=[2] + StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC, c@2 ASC] + "); Ok(()) } @@ -2667,20 +2604,13 @@ async fn test_push_with_required_input_ordering_prohibited() -> Result<()> { .with_repartition_sorts(true) // should not be able to push shorts .with_expect_no_change(true); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortExec: expr=[b@1 ASC], preserve_partitioning=[false]", - " RequiredInputOrderingExec", - " SortExec: expr=[a@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: partitions=1, partition_sizes=[0]", - ]; - let expected_optimized = [ - "SortExec: expr=[b@1 ASC], preserve_partitioning=[false]", - " RequiredInputOrderingExec", - " SortExec: expr=[a@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: partitions=1, partition_sizes=[0]", - ]; - "#); + assert_snapshot!(test.run(), @r" + Input / Optimized Plan: + SortExec: expr=[b@1 ASC], preserve_partitioning=[false] + RequiredInputOrderingExec + SortExec: expr=[a@0 ASC], preserve_partitioning=[false] + DataSourceExec: partitions=1, partition_sizes=[0] + "); Ok(()) } @@ -2709,20 +2639,19 @@ async fn test_push_with_required_input_ordering_allowed() -> Result<()> { let test = EnforceSortingTest::new(plan.clone()) .with_repartition_sorts(true) .with_expected_description("// Should be able to push down"); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortExec: expr=[a@0 ASC, b@1 ASC], preserve_partitioning=[false]", - " RequiredInputOrderingExec", - " SortExec: expr=[a@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: partitions=1, partition_sizes=[0]", - ]; + assert_snapshot!(test.run(), @r" + Input Plan: + SortExec: expr=[a@0 ASC, b@1 ASC], preserve_partitioning=[false] + RequiredInputOrderingExec + SortExec: expr=[a@0 ASC], preserve_partitioning=[false] + DataSourceExec: partitions=1, partition_sizes=[0] + // Should be able to push down - let expected_optimized = [ - "RequiredInputOrderingExec", - " SortExec: expr=[a@0 ASC, b@1 ASC], preserve_partitioning=[false]", - " DataSourceExec: partitions=1, partition_sizes=[0]", - ]; - "#); + Optimized Plan: + RequiredInputOrderingExec + SortExec: expr=[a@0 ASC, b@1 ASC], preserve_partitioning=[false] + DataSourceExec: partitions=1, partition_sizes=[0] + "); Ok(()) } @@ -2738,16 +2667,15 @@ async fn test_replace_with_partial_sort() -> Result<()> { let test = EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortExec: expr=[a@0 ASC, c@2 ASC], preserve_partitioning=[false]", - " StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC]", - ]; - let expected_optimized = [ - "PartialSortExec: expr=[a@0 ASC, c@2 ASC], common_prefix_length=[1]", - " StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC]", - ]; - "#); + assert_snapshot!(test.run(), @r" + Input Plan: + SortExec: expr=[a@0 ASC, c@2 ASC], preserve_partitioning=[false] + StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC] + + Optimized Plan: + PartialSortExec: expr=[a@0 ASC, c@2 ASC], common_prefix_length=[1] + StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC] + "); Ok(()) } @@ -2768,16 +2696,11 @@ async fn test_not_replaced_with_partial_sort_for_unbounded_input() -> Result<()> let test = EnforceSortingTest::new(physical_plan.clone()) .with_repartition_sorts(true) .with_expect_no_change(true); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortExec: expr=[a@0 ASC, b@1 ASC, c@2 ASC], preserve_partitioning=[false]", - " StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC]", - ]; - let expected_optimized = [ - "SortExec: expr=[a@0 ASC, b@1 ASC, c@2 ASC], preserve_partitioning=[false]", - " StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC]", - ]; - "#); + assert_snapshot!(test.run(), @r" + Input / Optimized Plan: + SortExec: expr=[a@0 ASC, b@1 ASC, c@2 ASC], preserve_partitioning=[false] + StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC] + "); Ok(()) } // aal here @@ -4064,17 +3987,16 @@ fn test_removes_unused_orthogonal_sort() -> Result<()> { "// Test: should remove orthogonal sort, and the uppermost (unneeded) sort:", ); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortExec: expr=[b@1 ASC, c@2 ASC], preserve_partitioning=[false]", - " SortExec: expr=[a@0 ASC], preserve_partitioning=[false]", - " StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC]", - ]; + assert_snapshot!(test.run(), @r" + Input Plan: + SortExec: expr=[b@1 ASC, c@2 ASC], preserve_partitioning=[false] + SortExec: expr=[a@0 ASC], preserve_partitioning=[false] + StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC] + // Test: should remove orthogonal sort, and the uppermost (unneeded) sort: - let expected_optimized = [ - "StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC]", - ]; - "#); + Optimized Plan: + StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC] + "); Ok(()) } @@ -4095,19 +4017,18 @@ fn test_keeps_used_orthogonal_sort() -> Result<()> { .with_expected_description( "// Test: should keep the orthogonal sort, since it modifies the output:", ); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortExec: expr=[b@1 ASC, c@2 ASC], preserve_partitioning=[false]", - " SortExec: TopK(fetch=3), expr=[a@0 ASC], preserve_partitioning=[false]", - " StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC]", - ]; + assert_snapshot!(test.run(), @r" + Input Plan: + SortExec: expr=[b@1 ASC, c@2 ASC], preserve_partitioning=[false] + SortExec: TopK(fetch=3), expr=[a@0 ASC], preserve_partitioning=[false] + StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC] + // Test: should keep the orthogonal sort, since it modifies the output: - let expected_optimized = [ - "SortExec: expr=[b@1 ASC, c@2 ASC], preserve_partitioning=[false]", - " SortExec: TopK(fetch=3), expr=[a@0 ASC], preserve_partitioning=[false]", - " StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC]", - ]; - "#); + Optimized Plan: + SortExec: expr=[b@1 ASC, c@2 ASC], preserve_partitioning=[false] + SortExec: TopK(fetch=3), expr=[a@0 ASC], preserve_partitioning=[false] + StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC] + "); Ok(()) } @@ -4130,22 +4051,21 @@ fn test_handles_multiple_orthogonal_sorts() -> Result<()> { // Test scenario/input has an orthogonal sort: let test = EnforceSortingTest::new(output_sort.clone()).with_repartition_sorts(true) .with_expected_description("// Test: should keep only the needed orthogonal sort, and remove the unneeded ones:"); - assert_snapshot!(test.run(), @r#" - let expected_input = [ - "SortExec: expr=[b@1 ASC, c@2 ASC], preserve_partitioning=[false]", - " SortExec: expr=[a@0 ASC], preserve_partitioning=[false]", - " SortExec: expr=[c@2 ASC], preserve_partitioning=[false]", - " SortExec: TopK(fetch=3), expr=[a@0 ASC], preserve_partitioning=[false]", - " SortExec: expr=[c@2 ASC], preserve_partitioning=[false]", - " StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC]", - ]; + assert_snapshot!(test.run(), @r" + Input Plan: + SortExec: expr=[b@1 ASC, c@2 ASC], preserve_partitioning=[false] + SortExec: expr=[a@0 ASC], preserve_partitioning=[false] + SortExec: expr=[c@2 ASC], preserve_partitioning=[false] + SortExec: TopK(fetch=3), expr=[a@0 ASC], preserve_partitioning=[false] + SortExec: expr=[c@2 ASC], preserve_partitioning=[false] + StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC] + // Test: should keep only the needed orthogonal sort, and remove the unneeded ones: - let expected_optimized = [ - "SortExec: expr=[b@1 ASC, c@2 ASC], preserve_partitioning=[false]", - " SortExec: TopK(fetch=3), expr=[a@0 ASC], preserve_partitioning=[false]", - " StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC]", - ]; - "#); + Optimized Plan: + SortExec: expr=[b@1 ASC, c@2 ASC], preserve_partitioning=[false] + SortExec: TopK(fetch=3), expr=[a@0 ASC], preserve_partitioning=[false] + StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC] + "); Ok(()) } From 9739758b6e7497fd928a6013024a239d2a3fee00 Mon Sep 17 00:00:00 2001 From: blaginin Date: Sat, 4 Oct 2025 19:18:14 +0100 Subject: [PATCH 05/14] migrate `test_sort_merge_join_order_by_left` --- .../physical_optimizer/enforce_sorting.rs | 75 ++++++++++++------- 1 file changed, 48 insertions(+), 27 deletions(-) diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs index 424e79f1ed12d..e6938de45837c 100644 --- a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs +++ b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs @@ -70,7 +70,7 @@ use datafusion_execution::TaskContext; use datafusion_catalog::streaming::StreamingTable; use futures::StreamExt; -use insta::assert_snapshot; +use insta::{assert_snapshot, Settings}; use rstest::rstest; /// Create a sorted Csv exec @@ -1356,6 +1356,8 @@ async fn test_sort_merge_join_order_by_left() -> Result<()> { Arc::new(Column::new_with_schema("col_a", &right.schema())?) as _, )]; + let settings = Settings::clone_current(); + let join_types = vec![ JoinType::Inner, JoinType::Left, @@ -1374,43 +1376,62 @@ async fn test_sort_merge_join_order_by_left() -> Result<()> { .into(); let physical_plan = sort_preserving_merge_exec(ordering, join); - let join_plan = format!( - "SortMergeJoin: join_type={join_type}, on=[(nullable_col@0, col_a@0)]" - ); - let join_plan2 = format!( - " SortMergeJoin: join_type={join_type}, on=[(nullable_col@0, col_a@0)]" + let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); + + let mut settings = settings.clone(); + + settings.add_filter( + // join_type={} replace with join_type=... to avoid snapshot name issue + format!("join_type={}", join_type).as_str(), + "join_type=...", ); - let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]", - join_plan2.as_str(), - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet"]; - let expected_optimized = match join_type { + + insta::allow_duplicates! { + settings.bind( || { + + + match join_type { JoinType::Inner | JoinType::Left | JoinType::LeftSemi | JoinType::LeftAnti => { // can push down the sort requirements and save 1 SortExec - vec![ - join_plan.as_str(), - " SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - " SortExec: expr=[col_a@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet", - ] + assert_snapshot!(test.run(), @r" + Input Plan: + SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC] + SortMergeJoin: join_type=..., on=[(nullable_col@0, col_a@0)] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet + + Optimized Plan: + SortMergeJoin: join_type=..., on=[(nullable_col@0, col_a@0)] + SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + SortExec: expr=[col_a@0 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet + "); } _ => { // can not push down the sort requirements - vec![ - "SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]", - join_plan2.as_str(), - " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - " SortExec: expr=[col_a@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet", - ] + assert_snapshot!(test.run(), @r" + Input Plan: + SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC] + SortMergeJoin: join_type=..., on=[(nullable_col@0, col_a@0)] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet + + Optimized Plan: + SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false] + SortMergeJoin: join_type=..., on=[(nullable_col@0, col_a@0)] + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + SortExec: expr=[col_a@0 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet + "); } }; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); + }) + } } Ok(()) } From 93a62a92070e15a37429317cfad07b6c05f0318c Mon Sep 17 00:00:00 2001 From: blaginin Date: Sat, 4 Oct 2025 19:30:03 +0100 Subject: [PATCH 06/14] migrate `test_sort_merge_join_order_by_right` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../physical_optimizer/enforce_sorting.rs | 96 +++++++++++++------ 1 file changed, 65 insertions(+), 31 deletions(-) diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs index e6938de45837c..605ad41bacb48 100644 --- a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs +++ b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs @@ -1450,6 +1450,8 @@ async fn test_sort_merge_join_order_by_right() -> Result<()> { Arc::new(Column::new_with_schema("col_a", &right.schema())?) as _, )]; + let settings = Settings::clone_current(); + let join_types = vec![ JoinType::Inner, JoinType::Left, @@ -1467,44 +1469,76 @@ async fn test_sort_merge_join_order_by_right() -> Result<()> { .into(); let physical_plan = sort_preserving_merge_exec(ordering, join); - let join_plan = format!( - "SortMergeJoin: join_type={join_type}, on=[(nullable_col@0, col_a@0)]" - ); - let spm_plan = match join_type { - JoinType::RightAnti => "SortPreservingMergeExec: [col_a@0 ASC, col_b@1 ASC]", - _ => "SortPreservingMergeExec: [col_a@2 ASC, col_b@3 ASC]", - }; - let join_plan2 = format!( - " SortMergeJoin: join_type={join_type}, on=[(nullable_col@0, col_a@0)]" + let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); + + let mut settings = settings.clone(); + + settings.add_filter( + // join_type={} replace with join_type=... to avoid snapshot name issue + format!("join_type={}", join_type).as_str(), + "join_type=...", ); - let expected_input = [spm_plan, - join_plan2.as_str(), - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet"]; - let expected_optimized = match join_type { - JoinType::Inner | JoinType::Right | JoinType::RightAnti => { + + insta::allow_duplicates! { + settings.bind( || { + + + match join_type { + JoinType::Inner | JoinType::Right => { // can push down the sort requirements and save 1 SortExec - vec![ - join_plan.as_str(), - " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - " SortExec: expr=[col_a@0 ASC, col_b@1 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet", - ] + assert_snapshot!(test.run(), @r" + Input Plan: + SortPreservingMergeExec: [col_a@2 ASC, col_b@3 ASC] + SortMergeJoin: join_type=..., on=[(nullable_col@0, col_a@0)] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet + + Optimized Plan: + SortMergeJoin: join_type=..., on=[(nullable_col@0, col_a@0)] + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + SortExec: expr=[col_a@0 ASC, col_b@1 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet + "); + } + JoinType::RightAnti => { + // can push down the sort requirements and save 1 SortExec + assert_snapshot!(test.run(), @r" + Input Plan: + SortPreservingMergeExec: [col_a@0 ASC, col_b@1 ASC] + SortMergeJoin: join_type=..., on=[(nullable_col@0, col_a@0)] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet + + Optimized Plan: + SortMergeJoin: join_type=..., on=[(nullable_col@0, col_a@0)] + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + SortExec: expr=[col_a@0 ASC, col_b@1 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet + "); } _ => { // can not push down the sort requirements for Left and Full join. - vec![ - "SortExec: expr=[col_a@2 ASC, col_b@3 ASC], preserve_partitioning=[false]", - join_plan2.as_str(), - " SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet", - " SortExec: expr=[col_a@0 ASC], preserve_partitioning=[false]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet", - ] + assert_snapshot!(test.run(), @r" + Input Plan: + SortPreservingMergeExec: [col_a@2 ASC, col_b@3 ASC] + SortMergeJoin: join_type=..., on=[(nullable_col@0, col_a@0)] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet + + Optimized Plan: + SortExec: expr=[col_a@2 ASC, col_b@3 ASC], preserve_partitioning=[false] + SortMergeJoin: join_type=..., on=[(nullable_col@0, col_a@0)] + SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet + SortExec: expr=[col_a@0 ASC], preserve_partitioning=[false] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet + "); } }; - assert_optimized!(expected_input, expected_optimized, physical_plan, true); + }) + } } Ok(()) } From 10f970a9ec4dd40f6e48c9056fb641c410feae19 Mon Sep 17 00:00:00 2001 From: blaginin Date: Sat, 4 Oct 2025 19:39:12 +0100 Subject: [PATCH 07/14] migrate `test_with_lost_ordering_unbounded_bounded` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split the rstest parameterized test into two separate functions: - test_with_lost_ordering_unbounded - test_with_lost_ordering_bounded Each tests both repartition_sorts modes (false and true) using insta snapshots. Replaced the incomplete existing test_with_lost_ordering_bounded which only tested one mode. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../physical_optimizer/enforce_sorting.rs | 166 ++++++++---------- 1 file changed, 75 insertions(+), 91 deletions(-) diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs index 605ad41bacb48..f197f2852b8a9 100644 --- a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs +++ b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs @@ -1658,124 +1658,108 @@ async fn test_multilayer_coalesce_partitions() -> Result<()> { Ok(()) } -#[tokio::test] -async fn test_with_lost_ordering_bounded() -> Result<()> { +fn create_lost_ordering_plan(source_unbounded: bool) -> Result> { let schema = create_test_schema3()?; let sort_exprs = [sort_expr("a", &schema)]; - let source = csv_exec_sorted(&schema, sort_exprs); + // create either bounded or unbounded source + let source = if source_unbounded { + stream_exec_ordered(&schema, sort_exprs.clone().into()) + } else { + csv_exec_sorted(&schema, sort_exprs.clone()) + }; let repartition_rr = repartition_exec(source); let repartition_hash = Arc::new(RepartitionExec::try_new( repartition_rr, Partitioning::Hash(vec![col("c", &schema)?], 10), )?) as _; let coalesce_partitions = coalesce_partitions_exec(repartition_hash); - let physical_plan = sort_exec([sort_expr("a", &schema)].into(), coalesce_partitions); + let physical_plan = sort_exec(sort_exprs.into(), coalesce_partitions); + Ok(physical_plan) +} - let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); - assert_snapshot!(test.run(), @r" +#[tokio::test] +async fn test_with_lost_ordering_unbounded() -> Result<()> { + let physical_plan = create_lost_ordering_plan(true)?; + + let test_no_repartition_sorts = + EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(false); + + assert_snapshot!(test_no_repartition_sorts.run(), @r" Input Plan: SortExec: expr=[a@0 ASC], preserve_partitioning=[false] CoalescePartitionsExec RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10 RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=csv, has_header=false + StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC] Optimized Plan: SortPreservingMergeExec: [a@0 ASC] - SortExec: expr=[a@0 ASC], preserve_partitioning=[true] + RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC + RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC] + "); + + let test_with_repartition_sorts = + EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); + + assert_snapshot!(test_with_repartition_sorts.run(), @r" + Input Plan: + SortExec: expr=[a@0 ASC], preserve_partitioning=[false] + CoalescePartitionsExec RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10 RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=csv, has_header=false + StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC] + + Optimized Plan: + SortPreservingMergeExec: [a@0 ASC] + RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC + RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC] "); + Ok(()) } -#[rstest] #[tokio::test] -async fn test_with_lost_ordering_unbounded_bounded( - #[values(false, true)] source_unbounded: bool, -) -> Result<()> { - let schema = create_test_schema3()?; - let sort_exprs = [sort_expr("a", &schema)]; - // create either bounded or unbounded source - let source = if source_unbounded { - stream_exec_ordered(&schema, sort_exprs.clone().into()) - } else { - csv_exec_sorted(&schema, sort_exprs.clone()) - }; - let repartition_rr = repartition_exec(source); - let repartition_hash = Arc::new(RepartitionExec::try_new( - repartition_rr, - Partitioning::Hash(vec![col("c", &schema)?], 10), - )?) as _; - let coalesce_partitions = coalesce_partitions_exec(repartition_hash); - let physical_plan = sort_exec(sort_exprs.into(), coalesce_partitions); +async fn test_with_lost_ordering_bounded() -> Result<()> { + let physical_plan = create_lost_ordering_plan(false)?; - // Expected inputs unbounded and bounded - let expected_input_unbounded = vec![ - "SortExec: expr=[a@0 ASC], preserve_partitioning=[false]", - " CoalescePartitionsExec", - " RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10", - " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", - " StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC]", - ]; - let expected_input_bounded = vec![ - "SortExec: expr=[a@0 ASC], preserve_partitioning=[false]", - " CoalescePartitionsExec", - " RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10", - " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=csv, has_header=false", - ]; + let test_no_repartition_sorts = + EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(false); - // Expected unbounded result (same for with and without flag) - let expected_optimized_unbounded = vec![ - "SortPreservingMergeExec: [a@0 ASC]", - " RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC", - " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", - " StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC]", - ]; + assert_snapshot!(test_no_repartition_sorts.run(), @r" + Input Plan: + SortExec: expr=[a@0 ASC], preserve_partitioning=[false] + CoalescePartitionsExec + RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10 + RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=csv, has_header=false - // Expected bounded results with and without flag - let expected_optimized_bounded = vec![ - "SortExec: expr=[a@0 ASC], preserve_partitioning=[false]", - " CoalescePartitionsExec", - " RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10", - " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=csv, has_header=false", - ]; - let expected_optimized_bounded_parallelize_sort = vec![ - "SortPreservingMergeExec: [a@0 ASC]", - " SortExec: expr=[a@0 ASC], preserve_partitioning=[true]", - " RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10", - " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=csv, has_header=false", - ]; - let (expected_input, expected_optimized, expected_optimized_sort_parallelize) = - if source_unbounded { - ( - expected_input_unbounded, - expected_optimized_unbounded.clone(), - expected_optimized_unbounded, - ) - } else { - ( - expected_input_bounded, - expected_optimized_bounded, - expected_optimized_bounded_parallelize_sort, - ) - }; - assert_optimized!( - expected_input, - expected_optimized, - physical_plan.clone(), - false - ); - assert_optimized!( - expected_input, - expected_optimized_sort_parallelize, - physical_plan, - true - ); + Optimized Plan: + SortExec: expr=[a@0 ASC], preserve_partitioning=[false] + CoalescePartitionsExec + RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10 + RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=csv, has_header=false + "); + + let test_with_repartition_sorts = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); + + assert_snapshot!(test_with_repartition_sorts.run(), @r" + Input Plan: + SortExec: expr=[a@0 ASC], preserve_partitioning=[false] + CoalescePartitionsExec + RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10 + RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=csv, has_header=false + + Optimized Plan: + SortPreservingMergeExec: [a@0 ASC] + SortExec: expr=[a@0 ASC], preserve_partitioning=[true] + RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10 + RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 + DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=csv, has_header=false + "); Ok(()) } From 214ce0db794efe4a19ac8b42aa8e61909de3913b Mon Sep 17 00:00:00 2001 From: blaginin Date: Sat, 4 Oct 2025 20:11:06 +0100 Subject: [PATCH 08/14] Start migrating `test_window_partial_constant_and_set_monotonicity` --- .../physical_optimizer/enforce_sorting.rs | 107 +++++++++++++++--- 1 file changed, 89 insertions(+), 18 deletions(-) diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs index f197f2852b8a9..cc34530b6b549 100644 --- a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs +++ b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs @@ -2758,6 +2758,73 @@ async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> { .into(); let source = parquet_exec_with_sort(input_schema.clone(), vec![ordering]) as _; + // Macro for testing window function optimization with snapshots + macro_rules! test_window_case { + ( + partition_by: $partition_by:expr, + window_frame: $window_frame:expr, + func: ($func_def:expr, $func_name:expr, $func_args:expr), + required_sort: [$($col:expr, $asc:expr, $nulls_first:expr),*], + @ $expected:literal + ) => {{ + let partition_by_exprs = if $partition_by { + vec![col("nullable_col", &input_schema)?] + } else { + vec![] + }; + + let window_expr = create_window_expr( + &$func_def, + $func_name, + &$func_args, + &partition_by_exprs, + &[], + $window_frame, + Arc::clone(&input_schema), + false, + false, + None, + )?; + + let window_exec = if window_expr.uses_bounded_memory() { + Arc::new(BoundedWindowAggExec::try_new( + vec![window_expr], + Arc::clone(&source), + InputOrderMode::Sorted, + $partition_by, + )?) as Arc + } else { + Arc::new(WindowAggExec::try_new( + vec![window_expr], + Arc::clone(&source), + $partition_by, + )?) as Arc + }; + + let output_schema = window_exec.schema(); + let sort_expr = vec![ + $( + sort_expr_options( + $col, + &output_schema, + SortOptions { + descending: !$asc, + nulls_first: $nulls_first, + }, + ) + ),* + ]; + let ordering = LexOrdering::new(sort_expr).unwrap(); + let physical_plan = sort_exec(ordering, window_exec); + + let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); + + assert_snapshot!(test.run(), @ $expected); + + Result::<(), datafusion_common::DataFusionError>::Ok(()) + }}; + } + // Function definition - Alias of the resulting column - Arguments of the function #[derive(Clone)] struct WindowFuncParam(WindowFunctionDefinition, String, Vec>); @@ -2803,6 +2870,7 @@ async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> { "avg".to_string(), function_arg_unordered, ); + struct TestCase<'a> { // Whether window expression has a partition_by expression or not. // If it does, it will be on the ordered column -- `nullable_col`. @@ -2819,25 +2887,28 @@ async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> { initial_plan: Vec<&'a str>, expected_plan: Vec<&'a str>, } + + // ============================================REGION STARTS============================================ + // WindowAggExec + Plain(unbounded preceding, unbounded following) + no partition_by + on ordered column + // Case 0: + test_window_case!( + partition_by: false, + window_frame: Arc::new(WindowFrame::new(None)), + func: (fn_count_on_ordered.0.clone(), fn_count_on_ordered.1.clone(), fn_count_on_ordered.2.clone()), + required_sort: ["nullable_col", true, false, "count", true, false], + @ r#" + Input Plan: + SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + + Optimized Plan: + WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + "# + )?; + let test_cases = vec![ - // ============================================REGION STARTS============================================ - // WindowAggExec + Plain(unbounded preceding, unbounded following) + no partition_by + on ordered column - // Case 0: - TestCase { - partition_by: false, - window_frame: Arc::new(WindowFrame::new(None)), - func: fn_count_on_ordered.clone(), - required_sort_columns: vec![("nullable_col", true, false), ("count", true, false)], - initial_plan: vec![ - "SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "WindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, // Case 1: TestCase { partition_by: false, From 209afc033511727c67b82eb6d5ff3162a8eb31b6 Mon Sep 17 00:00:00 2001 From: blaginin Date: Sat, 4 Oct 2025 20:24:03 +0100 Subject: [PATCH 09/14] Migrate the rest `test_window_partial_constant_and_set_monotonicity` --- .../physical_optimizer/enforce_sorting.rs | 2590 ++++++++--------- 1 file changed, 1227 insertions(+), 1363 deletions(-) diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs index cc34530b6b549..d058395ff9850 100644 --- a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs +++ b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs @@ -226,93 +226,6 @@ impl EnforceSortingTest { } } -/// Runs the sort enforcement optimizer and asserts the plan -/// against the original and expected plans -/// -/// `$EXPECTED_PLAN_LINES`: input plan -/// `$EXPECTED_OPTIMIZED_PLAN_LINES`: optimized plan -/// `$PLAN`: the plan to optimized -/// `REPARTITION_SORTS`: Flag to set `config.options.optimizer.repartition_sorts` option. -/// -macro_rules! assert_optimized { - ($EXPECTED_PLAN_LINES: expr, $EXPECTED_OPTIMIZED_PLAN_LINES: expr, $PLAN: expr, $REPARTITION_SORTS: expr) => { - let mut config = ConfigOptions::new(); - config.optimizer.repartition_sorts = $REPARTITION_SORTS; - - // This file has 4 rules that use tree node, apply these rules as in the - // EnforceSorting::optimize implementation - // After these operations tree nodes should be in a consistent state. - // This code block makes sure that these rules doesn't violate tree node integrity. - { - let plan_requirements = PlanWithCorrespondingSort::new_default($PLAN.clone()); - let adjusted = plan_requirements - .transform_up(ensure_sorting) - .data() - .and_then(check_integrity)?; - // TODO: End state payloads will be checked here. - - let new_plan = if config.optimizer.repartition_sorts { - let plan_with_coalesce_partitions = - PlanWithCorrespondingCoalescePartitions::new_default(adjusted.plan); - let parallel = plan_with_coalesce_partitions - .transform_up(parallelize_sorts) - .data() - .and_then(check_integrity)?; - // TODO: End state payloads will be checked here. - parallel.plan - } else { - adjusted.plan - }; - - let plan_with_pipeline_fixer = OrderPreservationContext::new_default(new_plan); - let updated_plan = plan_with_pipeline_fixer - .transform_up(|plan_with_pipeline_fixer| { - replace_with_order_preserving_variants( - plan_with_pipeline_fixer, - false, - true, - &config, - ) - }) - .data() - .and_then(check_integrity)?; - // TODO: End state payloads will be checked here. - - let mut sort_pushdown = SortPushDown::new_default(updated_plan.plan); - assign_initial_requirements(&mut sort_pushdown); - check_integrity(pushdown_sorts(sort_pushdown)?)?; - // TODO: End state payloads will be checked here. - } - - let physical_plan = $PLAN; - let formatted = displayable(physical_plan.as_ref()).indent(true).to_string(); - let actual: Vec<&str> = formatted.trim().lines().collect(); - - let expected_plan_lines: Vec<&str> = $EXPECTED_PLAN_LINES - .iter().map(|s| *s).collect(); - - assert_eq!( - expected_plan_lines, actual, - "\n**Original Plan Mismatch\n\nexpected:\n\n{expected_plan_lines:#?}\nactual:\n\n{actual:#?}\n\n" - ); - - let expected_optimized_lines: Vec<&str> = $EXPECTED_OPTIMIZED_PLAN_LINES - .iter().map(|s| *s).collect(); - - // Run the actual optimizer - let optimized_physical_plan = - EnforceSorting::new().optimize(physical_plan,&config)?; - - // Get string representation of the plan - let actual = get_plan_string(&optimized_physical_plan); - assert_eq!( - expected_optimized_lines, actual, - "\n**Optimized Plan Mismatch\n\nexpected:\n\n{expected_optimized_lines:#?}\nactual:\n\n{actual:#?}\n\n" - ); - - }; -} - #[tokio::test] async fn test_remove_unnecessary_sort5() -> Result<()> { let left_schema = create_test_schema2()?; @@ -1743,7 +1656,8 @@ async fn test_with_lost_ordering_bounded() -> Result<()> { DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=csv, has_header=false "); - let test_with_repartition_sorts = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); + let test_with_repartition_sorts = + EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); assert_snapshot!(test_with_repartition_sorts.run(), @r" Input Plan: @@ -1885,94 +1799,6 @@ async fn test_not_replaced_with_partial_sort_for_bounded_input() -> Result<()> { Ok(()) } -/// Runs the sort enforcement optimizer and asserts the plan -/// against the original and expected plans -/// -/// `$EXPECTED_PLAN_LINES`: input plan -/// `$EXPECTED_OPTIMIZED_PLAN_LINES`: optimized plan -/// `$PLAN`: the plan to optimized -/// `REPARTITION_SORTS`: Flag to set `config.options.optimizer.repartition_sorts` option. -/// `$CASE_NUMBER` (optional): The test case number to print on failure. -macro_rules! assert_optimized { - ($EXPECTED_PLAN_LINES: expr, $EXPECTED_OPTIMIZED_PLAN_LINES: expr, $PLAN: expr, $REPARTITION_SORTS: expr $(, $CASE_NUMBER: expr)?) => { - let mut config = ConfigOptions::new(); - config.optimizer.repartition_sorts = $REPARTITION_SORTS; - - // This file has 4 rules that use tree node, apply these rules as in the - // EnforceSorting::optimize implementation - // After these operations tree nodes should be in a consistent state. - // This code block makes sure that these rules doesn't violate tree node integrity. - { - let plan_requirements = PlanWithCorrespondingSort::new_default($PLAN.clone()); - let adjusted = plan_requirements - .transform_up(ensure_sorting) - .data() - .and_then(check_integrity)?; - // TODO: End state payloads will be checked here. - - let new_plan = if config.optimizer.repartition_sorts { - let plan_with_coalesce_partitions = - PlanWithCorrespondingCoalescePartitions::new_default(adjusted.plan); - let parallel = plan_with_coalesce_partitions - .transform_up(parallelize_sorts) - .data() - .and_then(check_integrity)?; - // TODO: End state payloads will be checked here. - parallel.plan - } else { - adjusted.plan - }; - - let plan_with_pipeline_fixer = OrderPreservationContext::new_default(new_plan); - let updated_plan = plan_with_pipeline_fixer - .transform_up(|plan_with_pipeline_fixer| { - replace_with_order_preserving_variants( - plan_with_pipeline_fixer, - false, - true, - &config, - ) - }) - .data() - .and_then(check_integrity)?; - // TODO: End state payloads will be checked here. - - let mut sort_pushdown = SortPushDown::new_default(updated_plan.plan); - assign_initial_requirements(&mut sort_pushdown); - check_integrity(pushdown_sorts(sort_pushdown)?)?; - // TODO: End state payloads will be checked here. - } - - let physical_plan = $PLAN; - let formatted = displayable(physical_plan.as_ref()).indent(true).to_string(); - let actual: Vec<&str> = formatted.trim().lines().collect(); - - let expected_plan_lines: Vec<&str> = $EXPECTED_PLAN_LINES - .iter().map(|s| *s).collect(); - - if expected_plan_lines != actual { - $(println!("\n**Original Plan Mismatch in case {}**", $CASE_NUMBER);)? - println!("\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", expected_plan_lines, actual); - assert_eq!(expected_plan_lines, actual); - } - - let expected_optimized_lines: Vec<&str> = $EXPECTED_OPTIMIZED_PLAN_LINES - .iter().map(|s| *s).collect(); - - // Run the actual optimizer - let optimized_physical_plan = - EnforceSorting::new().optimize(physical_plan, &config)?; - - // Get string representation of the plan - let actual = get_plan_string(&optimized_physical_plan); - if expected_optimized_lines != actual { - $(println!("\n**Optimized Plan Mismatch in case {}**", $CASE_NUMBER);)? - println!("\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", expected_optimized_lines, actual); - assert_eq!(expected_optimized_lines, actual); - } - }; -} - #[tokio::test] async fn test_remove_unnecessary_sort() -> Result<()> { let schema = create_test_schema()?; @@ -2871,23 +2697,6 @@ async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> { function_arg_unordered, ); - struct TestCase<'a> { - // Whether window expression has a partition_by expression or not. - // If it does, it will be on the ordered column -- `nullable_col`. - partition_by: bool, - // Whether the frame is unbounded in both directions, or unbounded in - // only one direction (when set-monotonicity has a meaning), or it is - // a sliding window. - window_frame: Arc, - // Function definition - Alias of the resulting column - Arguments of the function - func: WindowFuncParam, - // Global sort requirement at the root and its direction, - // which is required to be removed or preserved -- (asc, nulls_first) - required_sort_columns: Vec<(&'a str, bool, bool)>, - initial_plan: Vec<&'a str>, - expected_plan: Vec<&'a str>, - } - // ============================================REGION STARTS============================================ // WindowAggExec + Plain(unbounded preceding, unbounded following) + no partition_by + on ordered column // Case 0: @@ -2908,1179 +2717,1234 @@ async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> { "# )?; - let test_cases = vec![ - // Case 1: - TestCase { - partition_by: false, - window_frame: Arc::new(WindowFrame::new(None)), - func: fn_max_on_ordered.clone(), - required_sort_columns: vec![("nullable_col", true, false), ("max", false, false)], - initial_plan: vec![ - "SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[max: Ok(Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "WindowAggExec: wdw=[max: Ok(Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 2: - TestCase { - partition_by: false, - window_frame: Arc::new(WindowFrame::new(None)), - func: fn_min_on_ordered.clone(), - required_sort_columns: vec![("min", false, false), ("nullable_col", true, false)], - initial_plan: vec![ - "SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[min: Ok(Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "WindowAggExec: wdw=[min: Ok(Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 3: - TestCase { - partition_by: false, - window_frame: Arc::new(WindowFrame::new(None)), - func: fn_avg_on_ordered.clone(), - required_sort_columns: vec![("avg", true, false), ("nullable_col", true, false)], - initial_plan: vec![ - "SortExec: expr=[avg@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[avg: Ok(Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "WindowAggExec: wdw=[avg: Ok(Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // =============================================REGION ENDS============================================= - // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = - // ============================================REGION STARTS============================================ - // WindowAggExec + Plain(unbounded preceding, unbounded following) + no partition_by + on unordered column - // Case 4: - TestCase { - partition_by: false, - window_frame: Arc::new(WindowFrame::new(None)), - func: fn_count_on_unordered.clone(), - required_sort_columns: vec![("non_nullable_col", true, false), ("count", true, false)], - initial_plan: vec![ - "SortExec: expr=[non_nullable_col@1 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "SortExec: expr=[non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 5: - TestCase { - partition_by: false, - window_frame: Arc::new(WindowFrame::new(None)), - func: fn_max_on_unordered.clone(), - required_sort_columns: vec![("non_nullable_col", false, false), ("max", false, false)], - initial_plan: vec![ - "SortExec: expr=[non_nullable_col@1 DESC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[max: Ok(Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "SortExec: expr=[non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[max: Ok(Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 6: - TestCase { - partition_by: false, - window_frame: Arc::new(WindowFrame::new(None)), - func: fn_min_on_unordered.clone(), - required_sort_columns: vec![("min", true, false), ("non_nullable_col", true, false)], - initial_plan: vec![ - "SortExec: expr=[min@2 ASC NULLS LAST, non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[min: Ok(Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "SortExec: expr=[non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[min: Ok(Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 7: - TestCase { - partition_by: false, - window_frame: Arc::new(WindowFrame::new(None)), - func: fn_avg_on_unordered.clone(), - required_sort_columns: vec![("avg", false, false), ("nullable_col", false, false)], - initial_plan: vec![ - "SortExec: expr=[avg@2 DESC NULLS LAST, nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[avg: Ok(Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[avg: Ok(Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // =============================================REGION ENDS============================================= - // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = - // ============================================REGION STARTS============================================ - // WindowAggExec + Plain(unbounded preceding, unbounded following) + partition_by + on ordered column - // Case 8: - TestCase { - partition_by: true, - window_frame: Arc::new(WindowFrame::new(None)), - func: fn_count_on_ordered.clone(), - required_sort_columns: vec![("nullable_col", true, false), ("count", true, false)], - initial_plan: vec![ - "SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "WindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 9: - TestCase { - partition_by: true, - window_frame: Arc::new(WindowFrame::new(None)), - func: fn_max_on_ordered.clone(), - required_sort_columns: vec![("nullable_col", true, false), ("max", false, false)], - initial_plan: vec![ - "SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[max: Ok(Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "WindowAggExec: wdw=[max: Ok(Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 10: - TestCase { - partition_by: true, - window_frame: Arc::new(WindowFrame::new(None)), - func: fn_min_on_ordered.clone(), - required_sort_columns: vec![("min", false, false), ("nullable_col", true, false)], - initial_plan: vec![ - "SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[min: Ok(Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[min: Ok(Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 11: - TestCase { - partition_by: true, - window_frame: Arc::new(WindowFrame::new(None)), - func: fn_avg_on_ordered.clone(), - required_sort_columns: vec![("avg", true, false), ("nullable_col", true, false)], - initial_plan: vec![ - "SortExec: expr=[avg@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[avg: Ok(Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "SortExec: expr=[avg@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[avg: Ok(Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // =============================================REGION ENDS============================================= - // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = - // ============================================REGION STARTS============================================ - // WindowAggExec + Plain(unbounded preceding, unbounded following) + partition_by + on unordered column - // Case 12: - TestCase { - partition_by: true, - window_frame: Arc::new(WindowFrame::new(None)), - func: fn_count_on_unordered.clone(), - required_sort_columns: vec![("non_nullable_col", true, false), ("count", true, false)], - initial_plan: vec![ - "SortExec: expr=[non_nullable_col@1 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "SortExec: expr=[non_nullable_col@1 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 13: - TestCase { - partition_by: true, - window_frame: Arc::new(WindowFrame::new(None)), - func: fn_max_on_unordered.clone(), - required_sort_columns: vec![("non_nullable_col", true, false), ("max", false, false)], - initial_plan: vec![ - "SortExec: expr=[non_nullable_col@1 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[max: Ok(Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "SortExec: expr=[non_nullable_col@1 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[max: Ok(Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 14: - TestCase { - partition_by: true, - window_frame: Arc::new(WindowFrame::new(None)), - func: fn_min_on_unordered.clone(), - required_sort_columns: vec![("min", false, false), ("non_nullable_col", true, false)], - initial_plan: vec![ - "SortExec: expr=[min@2 DESC NULLS LAST, non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[min: Ok(Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "SortExec: expr=[min@2 DESC NULLS LAST, non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[min: Ok(Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 15: - TestCase { - partition_by: true, - window_frame: Arc::new(WindowFrame::new(None)), - func: fn_avg_on_unordered.clone(), - required_sort_columns: vec![("avg", true, false), ("nullable_col", true, false)], - initial_plan: vec![ - "SortExec: expr=[avg@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[avg: Ok(Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "SortExec: expr=[avg@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[avg: Ok(Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // =============================================REGION ENDS============================================= - // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = - // ============================================REGION STARTS============================================ - // WindowAggExec + Sliding(current row, unbounded following) + no partition_by + on ordered column - // Case 16: - TestCase { - partition_by: false, - window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()), - func: fn_count_on_ordered.clone(), - required_sort_columns: vec![("nullable_col", true, false), ("count", false, false)], - initial_plan: vec![ - "SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 DESC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "WindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 17: - TestCase { - partition_by: false, - window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()), - func: fn_max_on_ordered.clone(), - required_sort_columns: vec![("max", false, true), ("nullable_col", true, false)], - initial_plan: vec![ - "SortExec: expr=[max@2 DESC, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[max: Ok(Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "WindowAggExec: wdw=[max: Ok(Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 18: - TestCase { - partition_by: false, - window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()), - func: fn_min_on_ordered.clone(), - required_sort_columns: vec![("min", true, true), ("nullable_col", true, false)], - initial_plan: vec![ - "SortExec: expr=[min@2 ASC, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[min: Ok(Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "WindowAggExec: wdw=[min: Ok(Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 19: - TestCase { - partition_by: false, - window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()), - func: fn_avg_on_ordered.clone(), - required_sort_columns: vec![("avg", false, false), ("nullable_col", true, false)], - initial_plan: vec![ - "SortExec: expr=[avg@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[avg: Ok(Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "SortExec: expr=[avg@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[avg: Ok(Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // =============================================REGION ENDS============================================= - // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = - // ============================================REGION STARTS============================================ - // WindowAggExec + Sliding(current row, unbounded following) + no partition_by + on unordered column - // Case 20: - TestCase { - partition_by: false, - window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()), - func: fn_count_on_unordered.clone(), - required_sort_columns: vec![("nullable_col", true, false), ("count", true, false)], - initial_plan: vec![ - "SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 21: - TestCase { - partition_by: false, - window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()), - func: fn_max_on_unordered.clone(), - required_sort_columns: vec![("nullable_col", true, false), ("max", false, true)], - initial_plan: vec![ - "SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC], preserve_partitioning=[false]", - " WindowAggExec: wdw=[max: Ok(Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "WindowAggExec: wdw=[max: Ok(Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 22: - TestCase { - partition_by: false, - window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()), - func: fn_min_on_unordered.clone(), - required_sort_columns: vec![("min", true, false), ("nullable_col", true, false)], - initial_plan: vec![ - "SortExec: expr=[min@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[min: Ok(Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "SortExec: expr=[min@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[min: Ok(Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 23: - TestCase { - partition_by: false, - window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()), - func: fn_avg_on_unordered.clone(), - required_sort_columns: vec![("avg", false, false), ("nullable_col", true, false)], - initial_plan: vec![ - "SortExec: expr=[avg@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[avg: Ok(Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "SortExec: expr=[avg@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[avg: Ok(Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // =============================================REGION ENDS============================================= - // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = - // ============================================REGION STARTS============================================ - // WindowAggExec + Sliding(current row, unbounded following) + partition_by + on ordered column - // Case 24: - TestCase { - partition_by: true, - window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()), - func: fn_count_on_ordered.clone(), - required_sort_columns: vec![("nullable_col", true, false), ("count", false, false)], - initial_plan: vec![ - "SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 DESC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "WindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 25: - TestCase { - partition_by: true, - window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()), - func: fn_max_on_ordered.clone(), - required_sort_columns: vec![("nullable_col", true, false), ("max", true, false)], - initial_plan: vec![ - "SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[max: Ok(Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[max: Ok(Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 26: - TestCase { - partition_by: true, - window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()), - func: fn_min_on_ordered.clone(), - required_sort_columns: vec![("min", false, false)], - initial_plan: vec![ - "SortExec: expr=[min@2 DESC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[min: Ok(Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "SortExec: expr=[min@2 DESC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[min: Ok(Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 27: - TestCase { - partition_by: true, - window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()), - func: fn_avg_on_ordered.clone(), - required_sort_columns: vec![("avg", false, false)], - initial_plan: vec![ - "SortExec: expr=[avg@2 DESC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[avg: Ok(Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "SortExec: expr=[avg@2 DESC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[avg: Ok(Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // =============================================REGION ENDS============================================= - // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = - // ============================================REGION STARTS============================================ - // WindowAggExec + Sliding(current row, unbounded following) + partition_by + on unordered column - // Case 28: - TestCase { - partition_by: true, - window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()), - func: fn_count_on_unordered.clone(), - required_sort_columns: vec![("count", false, false), ("nullable_col", true, false)], - initial_plan: vec![ - "SortExec: expr=[count@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "SortExec: expr=[count@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet" - ], - }, - // Case 29: - TestCase { - partition_by: true, - window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()), - func: fn_max_on_unordered.clone(), - required_sort_columns: vec![("nullable_col", true, false), ("max", false, true)], - initial_plan: vec![ - "SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC], preserve_partitioning=[false]", - " WindowAggExec: wdw=[max: Ok(Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "WindowAggExec: wdw=[max: Ok(Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 30: - TestCase { - partition_by: true, - window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()), - func: fn_min_on_unordered.clone(), - required_sort_columns: vec![("min", false, false)], - initial_plan: vec![ - "SortExec: expr=[min@2 DESC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[min: Ok(Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "SortExec: expr=[min@2 DESC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[min: Ok(Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 31: - TestCase { - partition_by: true, - window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()), - func: fn_avg_on_unordered.clone(), - required_sort_columns: vec![("nullable_col", true, false), ("avg", true, false)], - initial_plan: vec![ - "SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[avg: Ok(Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet" - ], - expected_plan: vec![ - "SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false]", - " WindowAggExec: wdw=[avg: Ok(Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet" - ], - }, - // =============================================REGION ENDS============================================= - // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = - // ============================================REGION STARTS============================================ - // BoundedWindowAggExec + Plain(unbounded preceding, unbounded following) + no partition_by + on ordered column - // Case 32: - TestCase { - partition_by: false, - window_frame: Arc::new(WindowFrame::new(Some(true))), - func: fn_count_on_ordered.clone(), - required_sort_columns: vec![("nullable_col", true, false), ("count", true, false)], - initial_plan: vec![ - "SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 33: - TestCase { - partition_by: false, - window_frame: Arc::new(WindowFrame::new(Some(true))), - func: fn_max_on_ordered.clone(), - required_sort_columns: vec![("max", false, false), ("nullable_col", true, false)], - initial_plan: vec![ - "SortExec: expr=[max@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[max: Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "SortExec: expr=[max@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[max: Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet" - ], - }, - // Case 34: - TestCase { - partition_by: false, - window_frame: Arc::new(WindowFrame::new(Some(true))), - func: fn_min_on_ordered.clone(), - required_sort_columns: vec![("min", false, false), ("nullable_col", true, false)], - initial_plan: vec![ - "SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[min: Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet" - ], - expected_plan: vec![ - "BoundedWindowAggExec: wdw=[min: Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 35: - TestCase { - partition_by: false, - window_frame: Arc::new(WindowFrame::new(Some(true))), - func: fn_avg_on_ordered.clone(), - required_sort_columns: vec![("nullable_col", true, false), ("avg", true, false)], - initial_plan: vec![ - "SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[avg: Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[avg: Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // =============================================REGION ENDS============================================= - // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = - // ============================================REGION STARTS============================================ - // BoundedWindowAggExec + Plain(unbounded preceding, unbounded following) + no partition_by + on unordered column - // Case 36: - TestCase { - partition_by: false, - window_frame: Arc::new(WindowFrame::new(Some(true))), - func: fn_count_on_unordered.clone(), - required_sort_columns: vec![("nullable_col", true, false), ("count", true, true)], - initial_plan: vec![ - "SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 37: - TestCase { - partition_by: false, - window_frame: Arc::new(WindowFrame::new(Some(true))), - func: fn_max_on_unordered.clone(), - required_sort_columns: vec![("max", true, false), ("nullable_col", true, false)], - initial_plan: vec![ - "SortExec: expr=[max@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[max: Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "BoundedWindowAggExec: wdw=[max: Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 38: - TestCase { - partition_by: false, - window_frame: Arc::new(WindowFrame::new(Some(true))), - func: fn_min_on_unordered.clone(), - required_sort_columns: vec![("min", false, true), ("nullable_col", true, false)], - initial_plan: vec![ - "SortExec: expr=[min@2 DESC, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[min: Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "SortExec: expr=[min@2 DESC, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[min: Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 39: - TestCase { - partition_by: false, - window_frame: Arc::new(WindowFrame::new(Some(true))), - func: fn_avg_on_unordered.clone(), - required_sort_columns: vec![("avg", true, false)], - initial_plan: vec![ - "SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[avg: Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[avg: Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // =============================================REGION ENDS============================================= - // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = - // ============================================REGION STARTS============================================ - // BoundedWindowAggExec + Plain(unbounded preceding, unbounded following) + partition_by + on ordered column - // Case 40: - TestCase { - partition_by: true, - window_frame: Arc::new(WindowFrame::new(Some(true))), - func: fn_count_on_ordered.clone(), - required_sort_columns: vec![("nullable_col", true, false), ("count", true, false)], - initial_plan: vec![ - "SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 41: - TestCase { - partition_by: true, - window_frame: Arc::new(WindowFrame::new(Some(true))), - func: fn_max_on_ordered.clone(), - required_sort_columns: vec![("max", true, false), ("nullable_col", true, false)], - initial_plan: vec![ - "SortExec: expr=[max@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[max: Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet" - ], - expected_plan: vec![ - "SortExec: expr=[max@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[max: Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet" - ], - }, - // Case 42: - TestCase { - partition_by: true, - window_frame: Arc::new(WindowFrame::new(Some(true))), - func: fn_min_on_ordered.clone(), - required_sort_columns: vec![("min", false, false), ("nullable_col", true, false)], - initial_plan: vec![ - "SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[min: Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[min: Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 43: - TestCase { - partition_by: true, - window_frame: Arc::new(WindowFrame::new(Some(true))), - func: fn_avg_on_ordered.clone(), - required_sort_columns: vec![("nullable_col", true, false), ("avg", true, false)], - initial_plan: vec![ - "SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[avg: Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[avg: Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // =============================================REGION ENDS============================================= - // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = - // ============================================REGION STARTS============================================ - // BoundedWindowAggExec + Plain(unbounded preceding, unbounded following) + partition_by + on unordered column - // Case 44: - TestCase { - partition_by: true, - window_frame: Arc::new(WindowFrame::new(Some(true))), - func: fn_count_on_unordered.clone(), - required_sort_columns: vec![ ("count", true, true)], - initial_plan: vec![ - "SortExec: expr=[count@2 ASC], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "SortExec: expr=[count@2 ASC], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", ], - }, - // Case 45: - TestCase { - partition_by: true, - window_frame: Arc::new(WindowFrame::new(Some(true))), - func: fn_max_on_unordered.clone(), - required_sort_columns: vec![("nullable_col", true, false), ("max", false, false)], - initial_plan: vec![ - "SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[max: Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[max: Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 46: - TestCase { - partition_by: true, - window_frame: Arc::new(WindowFrame::new(Some(true))), - func: fn_min_on_unordered.clone(), - required_sort_columns: vec![("nullable_col", true, false), ("min", false, false)], - initial_plan: vec![ - "SortExec: expr=[nullable_col@0 ASC NULLS LAST, min@2 DESC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[min: Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "BoundedWindowAggExec: wdw=[min: Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 47: - TestCase { - partition_by: true, - window_frame: Arc::new(WindowFrame::new(Some(true))), - func: fn_avg_on_unordered.clone(), - required_sort_columns: vec![("nullable_col", true, false)], - initial_plan: vec![ - "SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[avg: Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "BoundedWindowAggExec: wdw=[avg: Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // =============================================REGION ENDS============================================= - // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = - // ============================================REGION STARTS============================================ - // BoundedWindowAggExec + Sliding(bounded preceding, bounded following) + no partition_by + on ordered column - // Case 48: - TestCase { - partition_by: false, - window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)), - func: fn_count_on_ordered.clone(), - required_sort_columns: vec![("count", true, false), ("nullable_col", true, false)], - initial_plan: vec![ - "SortExec: expr=[count@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 49: - TestCase { - partition_by: false, - window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::Following(ScalarValue::new_one(&DataType::UInt32)?))), - func: fn_max_on_ordered.clone(), - required_sort_columns: vec![("max", true, false)], - initial_plan: vec![ - "SortExec: expr=[max@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[max: Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "SortExec: expr=[max@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[max: Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 50: - TestCase { - partition_by: false, - window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)), - func: fn_min_on_ordered.clone(), - required_sort_columns: vec![("nullable_col", true, false), ("min", false, false)], - initial_plan: vec![ - "SortExec: expr=[nullable_col@0 ASC NULLS LAST, min@2 DESC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[min: Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "BoundedWindowAggExec: wdw=[min: Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 51: - TestCase { - partition_by: false, - window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)), - func: fn_avg_on_ordered.clone(), - required_sort_columns: vec![("avg", true, false)], - initial_plan: vec![ - "SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[avg: Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[avg: Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // =============================================REGION ENDS============================================= - // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = - // ============================================REGION STARTS============================================ - // BoundedWindowAggExec + Sliding(bounded preceding, bounded following) + no partition_by + on unordered column - // Case 52: - TestCase { - partition_by: false, - window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::Following(ScalarValue::new_one(&DataType::UInt32)?))), - func: fn_count_on_unordered.clone(), - required_sort_columns: vec![("count", true, false), ("nullable_col", true, false)], - initial_plan: vec![ - "SortExec: expr=[count@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "SortExec: expr=[count@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet" - ], - }, - // Case 53: - TestCase { - partition_by: false, - window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)), - func: fn_max_on_unordered.clone(), - required_sort_columns: vec![("nullable_col", true, false), ("max", true, false)], - initial_plan: vec![ - "SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[max: Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[max: Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 54: - TestCase { - partition_by: false, - window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)), - func: fn_min_on_unordered.clone(), - required_sort_columns: vec![("min", true, false)], - initial_plan: vec![ - "SortExec: expr=[min@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[min: Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "SortExec: expr=[min@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[min: Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 55: - TestCase { - partition_by: false, - window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::Following(ScalarValue::new_one(&DataType::UInt32)?))), - func: fn_avg_on_unordered.clone(), - required_sort_columns: vec![("nullable_col", true, false)], - initial_plan: vec![ - "SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[avg: Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "BoundedWindowAggExec: wdw=[avg: Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // =============================================REGION ENDS============================================= - // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = - // ============================================REGION STARTS============================================ - // BoundedWindowAggExec + Sliding(bounded preceding, bounded following) + partition_by + on ordered column - // Case 56: - TestCase { - partition_by: true, - window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)), - func: fn_count_on_ordered.clone(), - required_sort_columns: vec![("count", true, false), ("nullable_col", true, false)], - initial_plan: vec![ - "SortExec: expr=[count@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 57: - TestCase { - partition_by: true, - window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::Following(ScalarValue::new_one(&DataType::UInt32)?))), - func: fn_max_on_ordered.clone(), - required_sort_columns: vec![("nullable_col", true, false), ("max", true, false)], - initial_plan: vec![ - "SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[max: Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[max: Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 58: - TestCase { - partition_by: true, - window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)), - func: fn_min_on_ordered.clone(), - required_sort_columns: vec![("min", false, false), ("nullable_col", true, false)], - initial_plan: vec![ - "SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[min: Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[min: Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 59: - TestCase { - partition_by: true, - window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)), - func: fn_avg_on_ordered.clone(), - required_sort_columns: vec![("avg", true, false)], - initial_plan: vec![ - "SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[avg: Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[avg: Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // =============================================REGION ENDS============================================= - // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = - // ============================================REGION STARTS============================================ - // BoundedWindowAggExec + Sliding(bounded preceding, bounded following) + partition_by + on unordered column - // Case 60: - TestCase { - partition_by: true, - window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)), - func: fn_count_on_unordered.clone(), - required_sort_columns: vec![("nullable_col", true, false), ("count", true, false)], - initial_plan: vec![ - "SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[count: Field { name: \"count\", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 61: - TestCase { - partition_by: true, - window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)), - func: fn_max_on_unordered.clone(), - required_sort_columns: vec![("nullable_col", true, false), ("max", true, true)], - initial_plan: vec![ - "SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[max: Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[max: Field { name: \"max\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 62: - TestCase { - partition_by: true, - window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)), - func: fn_min_on_unordered.clone(), - required_sort_columns: vec![("nullable_col", true, false), ("min", false, false)], - initial_plan: vec![ - "SortExec: expr=[nullable_col@0 ASC NULLS LAST, min@2 DESC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[min: Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "SortExec: expr=[nullable_col@0 ASC NULLS LAST, min@2 DESC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[min: Field { name: \"min\", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // Case 63: - TestCase { - partition_by: true, - window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)), - func: fn_avg_on_unordered.clone(), - required_sort_columns: vec![("nullable_col", true, false)], - initial_plan: vec![ - "SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]", - " BoundedWindowAggExec: wdw=[avg: Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - expected_plan: vec![ - "BoundedWindowAggExec: wdw=[avg: Field { name: \"avg\", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]", - " DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet", - ], - }, - // =============================================REGION ENDS============================================= - ]; + // Case 1: + test_window_case!( + partition_by: false, + window_frame: Arc::new(WindowFrame::new(None)), + func: (fn_max_on_ordered.0.clone(), fn_max_on_ordered.1.clone(), fn_max_on_ordered.2.clone()), + required_sort: ["nullable_col", true, false, "max", false, false], + @ r#" + Input Plan: + SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet - for (case_idx, case) in test_cases.into_iter().enumerate() { - let partition_by = if case.partition_by { - vec![col("nullable_col", &input_schema)?] - } else { - vec![] - }; - let window_expr = create_window_expr( - &case.func.0, - case.func.1, - &case.func.2, - &partition_by, - &[], - case.window_frame, - Arc::clone(&input_schema), - false, - false, - None, - )?; - let window_exec = if window_expr.uses_bounded_memory() { - Arc::new(BoundedWindowAggExec::try_new( - vec![window_expr], - Arc::clone(&source), - InputOrderMode::Sorted, - case.partition_by, - )?) as Arc - } else { - Arc::new(WindowAggExec::try_new( - vec![window_expr], - Arc::clone(&source), - case.partition_by, - )?) as _ - }; - let output_schema = window_exec.schema(); - let sort_expr = case - .required_sort_columns - .iter() - .map(|(col_name, asc, nf)| { - sort_expr_options( - col_name, - &output_schema, - SortOptions { - descending: !asc, - nulls_first: *nf, - }, - ) - }) - .collect::>(); - let ordering = LexOrdering::new(sort_expr).unwrap(); - let physical_plan = sort_exec(ordering, window_exec); - - assert_optimized!( - case.initial_plan, - case.expected_plan, - physical_plan, - true, - case_idx - ); - } + Optimized Plan: + WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + "# + )?; - Ok(()) -} + // Case 2: + test_window_case!( + partition_by: false, + window_frame: Arc::new(WindowFrame::new(None)), + func: (fn_min_on_ordered.0.clone(), fn_min_on_ordered.1.clone(), fn_min_on_ordered.2.clone()), + required_sort: ["min", false, false, "nullable_col", true, false], + @ r#" + Input Plan: + SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + + Optimized Plan: + WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + "# + )?; + + // Case 3: + test_window_case!( + partition_by: false, + window_frame: Arc::new(WindowFrame::new(None)), + func: (fn_avg_on_ordered.0.clone(), fn_avg_on_ordered.1.clone(), fn_avg_on_ordered.2.clone()), + required_sort: ["avg", true, false, "nullable_col", true, false], + @ r#" +Input Plan: +SortExec: expr=[avg@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + // =============================================REGION ENDS============================================= + // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = + // ============================================REGION STARTS============================================ + // WindowAggExec + Plain(unbounded preceding, unbounded following) + no partition_by + on unordered column + // Case 4: + test_window_case!( + partition_by: false, + window_frame: Arc::new(WindowFrame::new(None)), + func: (fn_count_on_unordered.0.clone(), fn_count_on_unordered.1.clone(), fn_count_on_unordered.2.clone()), + required_sort: ["non_nullable_col", true, false, "count", true, false], + @ r#" +Input Plan: +SortExec: expr=[non_nullable_col@1 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 5: + test_window_case!( + partition_by: false, + window_frame: Arc::new(WindowFrame::new(None)), + func: (fn_max_on_unordered.0.clone(), fn_max_on_unordered.1.clone(), fn_max_on_unordered.2.clone()), + required_sort: ["non_nullable_col", false, false, "max", false, false], + @ r#" +Input Plan: +SortExec: expr=[non_nullable_col@1 DESC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 6: + test_window_case!( + partition_by: false, + window_frame: Arc::new(WindowFrame::new(None)), + func: (fn_min_on_unordered.0.clone(), fn_min_on_unordered.1.clone(), fn_min_on_unordered.2.clone()), + required_sort: ["min", true, false, "non_nullable_col", true, false], + @ r#" +Input Plan: +SortExec: expr=[min@2 ASC NULLS LAST, non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 7: + test_window_case!( + partition_by: false, + window_frame: Arc::new(WindowFrame::new(None)), + func: (fn_avg_on_unordered.0.clone(), fn_avg_on_unordered.1.clone(), fn_avg_on_unordered.2.clone()), + required_sort: ["avg", false, false, "nullable_col", false, false], + @ r#" +Input Plan: +SortExec: expr=[avg@2 DESC NULLS LAST, nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + // =============================================REGION ENDS============================================= + // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = + // ============================================REGION STARTS============================================ + // WindowAggExec + Plain(unbounded preceding, unbounded following) + partition_by + on ordered column + // Case 8: + test_window_case!( + partition_by: true, + window_frame: Arc::new(WindowFrame::new(None)), + func: (fn_count_on_ordered.0.clone(), fn_count_on_ordered.1.clone(), fn_count_on_ordered.2.clone()), + required_sort: ["nullable_col", true, false, "count", true, false], + @ r#" +Input Plan: +SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 9: + test_window_case!( + partition_by: true, + window_frame: Arc::new(WindowFrame::new(None)), + func: (fn_max_on_ordered.0.clone(), fn_max_on_ordered.1.clone(), fn_max_on_ordered.2.clone()), + required_sort: ["nullable_col", true, false, "max", false, false], + @ r#" +Input Plan: +SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 10: + test_window_case!( + partition_by: true, + window_frame: Arc::new(WindowFrame::new(None)), + func: (fn_min_on_ordered.0.clone(), fn_min_on_ordered.1.clone(), fn_min_on_ordered.2.clone()), + required_sort: ["min", false, false, "nullable_col", true, false], + @ r#" +Input Plan: +SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + // Case 11: + test_window_case!( + partition_by: true, + window_frame: Arc::new(WindowFrame::new(None)), + func: (fn_avg_on_ordered.0.clone(), fn_avg_on_ordered.1.clone(), fn_avg_on_ordered.2.clone()), + required_sort: ["avg", true, false, "nullable_col", true, false], + @ r#" +Input Plan: +SortExec: expr=[avg@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[avg@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + // =============================================REGION ENDS============================================= + // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = + // ============================================REGION STARTS============================================ + // WindowAggExec + Plain(unbounded preceding, unbounded following) + partition_by + on unordered column + // Case 12: + test_window_case!( + partition_by: true, + window_frame: Arc::new(WindowFrame::new(None)), + func: (fn_count_on_unordered.0.clone(), fn_count_on_unordered.1.clone(), fn_count_on_unordered.2.clone()), + required_sort: ["non_nullable_col", true, false, "count", true, false], + @ r#" +Input Plan: +SortExec: expr=[non_nullable_col@1 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[non_nullable_col@1 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 13: + test_window_case!( + partition_by: true, + window_frame: Arc::new(WindowFrame::new(None)), + func: (fn_max_on_unordered.0.clone(), fn_max_on_unordered.1.clone(), fn_max_on_unordered.2.clone()), + required_sort: ["non_nullable_col", true, false, "max", false, false], + @ r#" +Input Plan: +SortExec: expr=[non_nullable_col@1 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[non_nullable_col@1 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 14: + test_window_case!( + partition_by: true, + window_frame: Arc::new(WindowFrame::new(None)), + func: (fn_min_on_unordered.0.clone(), fn_min_on_unordered.1.clone(), fn_min_on_unordered.2.clone()), + required_sort: ["min", false, false, "non_nullable_col", true, false], + @ r#" +Input Plan: +SortExec: expr=[min@2 DESC NULLS LAST, non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[min@2 DESC NULLS LAST, non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 15: + test_window_case!( + partition_by: true, + window_frame: Arc::new(WindowFrame::new(None)), + func: (fn_avg_on_unordered.0.clone(), fn_avg_on_unordered.1.clone(), fn_avg_on_unordered.2.clone()), + required_sort: ["avg", true, false, "nullable_col", true, false], + @ r#" +Input Plan: +SortExec: expr=[avg@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[avg@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // =============================================REGION ENDS============================================= + // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = + // ============================================REGION STARTS============================================ + // WindowAggExec + Sliding(current row, unbounded following) + no partition_by + on ordered column + // Case 16: + test_window_case!( + partition_by: false, + window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()), + func: (fn_count_on_ordered.0.clone(), fn_count_on_ordered.1.clone(), fn_count_on_ordered.2.clone()), + required_sort: ["nullable_col", true, false, "count", false, false], + @ r#" +Input Plan: +SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 DESC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 17: + test_window_case!( + partition_by: false, + window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()), + func: (fn_max_on_ordered.0.clone(), fn_max_on_ordered.1.clone(), fn_max_on_ordered.2.clone()), + required_sort: ["max", false, true, "nullable_col", true, false], + @ r#" +Input Plan: +SortExec: expr=[max@2 DESC, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 18: + test_window_case!( + partition_by: false, + window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()), + func: (fn_min_on_ordered.0.clone(), fn_min_on_ordered.1.clone(), fn_min_on_ordered.2.clone()), + required_sort: ["min", true, true, "nullable_col", true, false], + @ r#" +Input Plan: +SortExec: expr=[min@2 ASC, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 19: + test_window_case!( + partition_by: false, + window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()), + func: (fn_avg_on_ordered.0.clone(), fn_avg_on_ordered.1.clone(), fn_avg_on_ordered.2.clone()), + required_sort: ["avg", false, false, "nullable_col", true, false], + @ r#" +Input Plan: +SortExec: expr=[avg@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[avg@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + // =============================================REGION ENDS============================================= + // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = + // ============================================REGION STARTS============================================ + // WindowAggExec + Sliding(current row, unbounded following) + no partition_by + on unordered column + // Case 20: + test_window_case!( + partition_by: false, + window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()), + func: (fn_count_on_unordered.0.clone(), fn_count_on_unordered.1.clone(), fn_count_on_unordered.2.clone()), + required_sort: ["nullable_col", true, false, "count", true, false], + @ r#" +Input Plan: +SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 21: + test_window_case!( + partition_by: false, + window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()), + func: (fn_max_on_unordered.0.clone(), fn_max_on_unordered.1.clone(), fn_max_on_unordered.2.clone()), + required_sort: ["nullable_col", true, false, "max", false, true], + @ r#" +Input Plan: +SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC], preserve_partitioning=[false] + WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 22: + test_window_case!( + partition_by: false, + window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()), + func: (fn_min_on_unordered.0.clone(), fn_min_on_unordered.1.clone(), fn_min_on_unordered.2.clone()), + required_sort: ["min", true, false, "nullable_col", true, false], + @ r#" +Input Plan: +SortExec: expr=[min@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[min@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 23: + test_window_case!( + partition_by: false, + window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()), + func: (fn_avg_on_unordered.0.clone(), fn_avg_on_unordered.1.clone(), fn_avg_on_unordered.2.clone()), + required_sort: ["avg", false, false, "nullable_col", true, false], + @ r#" +Input Plan: +SortExec: expr=[avg@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[avg@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + // =============================================REGION ENDS============================================= + // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = + // ============================================REGION STARTS============================================ + // WindowAggExec + Sliding(current row, unbounded following) + partition_by + on ordered column + // Case 24: + test_window_case!( + partition_by: true, + window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()), + func: (fn_count_on_ordered.0.clone(), fn_count_on_ordered.1.clone(), fn_count_on_ordered.2.clone()), + required_sort: ["nullable_col", true, false, "count", false, false], + @ r#" +Input Plan: +SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 DESC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 25: + test_window_case!( + partition_by: true, + window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()), + func: (fn_max_on_ordered.0.clone(), fn_max_on_ordered.1.clone(), fn_max_on_ordered.2.clone()), + required_sort: ["nullable_col", true, false, "max", true, false], + @ r#" +Input Plan: +SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 26: + test_window_case!( + partition_by: true, + window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()), + func: (fn_min_on_ordered.0.clone(), fn_min_on_ordered.1.clone(), fn_min_on_ordered.2.clone()), + required_sort: ["min", false, false], + @ r#" +Input Plan: +SortExec: expr=[min@2 DESC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[min@2 DESC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 27: + test_window_case!( + partition_by: true, + window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()), + func: (fn_avg_on_ordered.0.clone(), fn_avg_on_ordered.1.clone(), fn_avg_on_ordered.2.clone()), + required_sort: ["avg", false, false], + @ r#" +Input Plan: +SortExec: expr=[avg@2 DESC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[avg@2 DESC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + // =============================================REGION ENDS============================================= + // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = + // ============================================REGION STARTS============================================ + // WindowAggExec + Sliding(current row, unbounded following) + partition_by + on unordered column + // Case 28: + test_window_case!( + partition_by: true, + window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()), + func: (fn_count_on_unordered.0.clone(), fn_count_on_unordered.1.clone(), fn_count_on_unordered.2.clone()), + required_sort: ["count", false, false, "nullable_col", true, false], + @ r#" +Input Plan: +SortExec: expr=[count@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[count@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 29: + test_window_case!( + partition_by: true, + window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()), + func: (fn_max_on_unordered.0.clone(), fn_max_on_unordered.1.clone(), fn_max_on_unordered.2.clone()), + required_sort: ["nullable_col", true, false, "max", false, true], + @ r#" +Input Plan: +SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC], preserve_partitioning=[false] + WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 30: + test_window_case!( + partition_by: true, + window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()), + func: (fn_min_on_unordered.0.clone(), fn_min_on_unordered.1.clone(), fn_min_on_unordered.2.clone()), + required_sort: ["min", false, false], + @ r#" +Input Plan: +SortExec: expr=[min@2 DESC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[min@2 DESC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 31: + test_window_case!( + partition_by: true, + window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()), + func: (fn_avg_on_unordered.0.clone(), fn_avg_on_unordered.1.clone(), fn_avg_on_unordered.2.clone()), + required_sort: ["nullable_col", true, false, "avg", true, false], + @ r#" +Input Plan: +SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // =============================================REGION ENDS============================================= + // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = + // ============================================REGION STARTS============================================ + // BoundedWindowAggExec + Plain(unbounded preceding, unbounded following) + no partition_by + on ordered column + // Case 32: + test_window_case!( + partition_by: false, + window_frame: Arc::new(WindowFrame::new(Some(true))), + func: (fn_count_on_ordered.0.clone(), fn_count_on_ordered.1.clone(), fn_count_on_ordered.2.clone()), + required_sort: ["nullable_col", true, false, "count", true, false], + @ r#" +Input Plan: +SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 33: + test_window_case!( + partition_by: false, + window_frame: Arc::new(WindowFrame::new(Some(true))), + func: (fn_max_on_ordered.0.clone(), fn_max_on_ordered.1.clone(), fn_max_on_ordered.2.clone()), + required_sort: ["max", false, false, "nullable_col", true, false], + @ r#" +Input Plan: +SortExec: expr=[max@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[max@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 34: + test_window_case!( + partition_by: false, + window_frame: Arc::new(WindowFrame::new(Some(true))), + func: (fn_min_on_ordered.0.clone(), fn_min_on_ordered.1.clone(), fn_min_on_ordered.2.clone()), + required_sort: ["min", false, false, "nullable_col", true, false], + @ r#" +Input Plan: +SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 35: + test_window_case!( + partition_by: false, + window_frame: Arc::new(WindowFrame::new(Some(true))), + func: (fn_avg_on_ordered.0.clone(), fn_avg_on_ordered.1.clone(), fn_avg_on_ordered.2.clone()), + required_sort: ["nullable_col", true, false, "avg", true, false], + @ r#" +Input Plan: +SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + // =============================================REGION ENDS============================================= + // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = + // ============================================REGION STARTS============================================ + // BoundedWindowAggExec + Plain(unbounded preceding, unbounded following) + no partition_by + on unordered column + // Case 36: + test_window_case!( + partition_by: false, + window_frame: Arc::new(WindowFrame::new(Some(true))), + func: (fn_count_on_unordered.0.clone(), fn_count_on_unordered.1.clone(), fn_count_on_unordered.2.clone()), + required_sort: ["nullable_col", true, false, "count", true, true], + @ r#" +Input Plan: +SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 37: + test_window_case!( + partition_by: false, + window_frame: Arc::new(WindowFrame::new(Some(true))), + func: (fn_max_on_unordered.0.clone(), fn_max_on_unordered.1.clone(), fn_max_on_unordered.2.clone()), + required_sort: ["max", true, false, "nullable_col", true, false], + @ r#" +Input Plan: +SortExec: expr=[max@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 38: + test_window_case!( + partition_by: false, + window_frame: Arc::new(WindowFrame::new(Some(true))), + func: (fn_min_on_unordered.0.clone(), fn_min_on_unordered.1.clone(), fn_min_on_unordered.2.clone()), + required_sort: ["min", false, true, "nullable_col", true, false], + @ r#" +Input Plan: +SortExec: expr=[min@2 DESC, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[min@2 DESC, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 39: + test_window_case!( + partition_by: false, + window_frame: Arc::new(WindowFrame::new(Some(true))), + func: (fn_avg_on_unordered.0.clone(), fn_avg_on_unordered.1.clone(), fn_avg_on_unordered.2.clone()), + required_sort: ["avg", true, false], + @ r#" +Input Plan: +SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + // =============================================REGION ENDS============================================= + // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = + // ============================================REGION STARTS============================================ + // BoundedWindowAggExec + Plain(unbounded preceding, unbounded following) + partition_by + on ordered column + // Case 40: + test_window_case!( + partition_by: true, + window_frame: Arc::new(WindowFrame::new(Some(true))), + func: (fn_count_on_ordered.0.clone(), fn_count_on_ordered.1.clone(), fn_count_on_ordered.2.clone()), + required_sort: ["nullable_col", true, false, "count", true, false], + @ r#" +Input Plan: +SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 41: + test_window_case!( + partition_by: true, + window_frame: Arc::new(WindowFrame::new(Some(true))), + func: (fn_max_on_ordered.0.clone(), fn_max_on_ordered.1.clone(), fn_max_on_ordered.2.clone()), + required_sort: ["max", true, false, "nullable_col", true, false], + @ r#" +Input Plan: +SortExec: expr=[max@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[max@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 42: + test_window_case!( + partition_by: true, + window_frame: Arc::new(WindowFrame::new(Some(true))), + func: (fn_min_on_ordered.0.clone(), fn_min_on_ordered.1.clone(), fn_min_on_ordered.2.clone()), + required_sort: ["min", false, false, "nullable_col", true, false], + @ r#" +Input Plan: +SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 43: + test_window_case!( + partition_by: true, + window_frame: Arc::new(WindowFrame::new(Some(true))), + func: (fn_avg_on_ordered.0.clone(), fn_avg_on_ordered.1.clone(), fn_avg_on_ordered.2.clone()), + required_sort: ["nullable_col", true, false, "avg", true, false], + @ r#" +Input Plan: +SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // =============================================REGION ENDS============================================= + // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = + // ============================================REGION STARTS============================================ + // BoundedWindowAggExec + Plain(unbounded preceding, unbounded following) + partition_by + on unordered column + // Case 44: + test_window_case!( + partition_by: true, + window_frame: Arc::new(WindowFrame::new(Some(true))), + func: (fn_count_on_unordered.0.clone(), fn_count_on_unordered.1.clone(), fn_count_on_unordered.2.clone()), + required_sort: ["count", true, true], + @ r#" +Input Plan: +SortExec: expr=[count@2 ASC], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[count@2 ASC], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 45: + test_window_case!( + partition_by: true, + window_frame: Arc::new(WindowFrame::new(Some(true))), + func: (fn_max_on_unordered.0.clone(), fn_max_on_unordered.1.clone(), fn_max_on_unordered.2.clone()), + required_sort: ["nullable_col", true, false, "max", false, false], + @ r#" +Input Plan: +SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 46: + test_window_case!( + partition_by: true, + window_frame: Arc::new(WindowFrame::new(Some(true))), + func: (fn_min_on_unordered.0.clone(), fn_min_on_unordered.1.clone(), fn_min_on_unordered.2.clone()), + required_sort: ["nullable_col", true, false, "min", false, false], + @ r#" +Input Plan: +SortExec: expr=[nullable_col@0 ASC NULLS LAST, min@2 DESC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 47: + test_window_case!( + partition_by: true, + window_frame: Arc::new(WindowFrame::new(Some(true))), + func: (fn_avg_on_unordered.0.clone(), fn_avg_on_unordered.1.clone(), fn_avg_on_unordered.2.clone()), + required_sort: ["nullable_col", true, false], + @ r#" +Input Plan: +SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // =============================================REGION ENDS============================================= + // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = + // ============================================REGION STARTS============================================ + // BoundedWindowAggExec + Sliding(bounded preceding, bounded following) + no partition_by + on ordered column + // Case 48: + test_window_case!( + partition_by: false, + window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)), + func: (fn_count_on_ordered.0.clone(), fn_count_on_ordered.1.clone(), fn_count_on_ordered.2.clone()), + required_sort: ["count", true, false, "nullable_col", true, false], + @ r#" +Input Plan: +SortExec: expr=[count@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 49: + test_window_case!( + partition_by: false, + window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::Following(ScalarValue::new_one(&DataType::UInt32)?))), + func: (fn_max_on_ordered.0.clone(), fn_max_on_ordered.1.clone(), fn_max_on_ordered.2.clone()), + required_sort: ["max", true, false], + @ r#" +Input Plan: +SortExec: expr=[max@2 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[max@2 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 50: + test_window_case!( + partition_by: false, + window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)), + func: (fn_min_on_ordered.0.clone(), fn_min_on_ordered.1.clone(), fn_min_on_ordered.2.clone()), + required_sort: ["nullable_col", true, false, "min", false, false], + @ r#" +Input Plan: +SortExec: expr=[nullable_col@0 ASC NULLS LAST, min@2 DESC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 51: + test_window_case!( + partition_by: false, + window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)), + func: (fn_avg_on_ordered.0.clone(), fn_avg_on_ordered.1.clone(), fn_avg_on_ordered.2.clone()), + required_sort: ["avg", true, false], + @ r#" +Input Plan: +SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // =============================================REGION ENDS============================================= + // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = + // ============================================REGION STARTS============================================ + // BoundedWindowAggExec + Sliding(bounded preceding, bounded following) + no partition_by + on unordered column + // Case 52: + test_window_case!( + partition_by: false, + window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::Following(ScalarValue::new_one(&DataType::UInt32)?))), + func: (fn_count_on_unordered.0.clone(), fn_count_on_unordered.1.clone(), fn_count_on_unordered.2.clone()), + required_sort: ["count", true, false, "nullable_col", true, false], + @ r#" +Input Plan: +SortExec: expr=[count@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[count@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 53: + test_window_case!( + partition_by: false, + window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)), + func: (fn_max_on_unordered.0.clone(), fn_max_on_unordered.1.clone(), fn_max_on_unordered.2.clone()), + required_sort: ["nullable_col", true, false, "max", true, false], + @ r#" +Input Plan: +SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 54: + test_window_case!( + partition_by: false, + window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)), + func: (fn_min_on_unordered.0.clone(), fn_min_on_unordered.1.clone(), fn_min_on_unordered.2.clone()), + required_sort: ["min", true, false], + @ r#" +Input Plan: +SortExec: expr=[min@2 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[min@2 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 55: + test_window_case!( + partition_by: false, + window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::Following(ScalarValue::new_one(&DataType::UInt32)?))), + func: (fn_avg_on_unordered.0.clone(), fn_avg_on_unordered.1.clone(), fn_avg_on_unordered.2.clone()), + required_sort: ["nullable_col", true, false], + @ r#" +Input Plan: +SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // =============================================REGION ENDS============================================= + // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = + // ============================================REGION STARTS============================================ + // BoundedWindowAggExec + Sliding(bounded preceding, bounded following) + partition_by + on ordered column + // Case 56: + test_window_case!( + partition_by: true, + window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)), + func: (fn_count_on_ordered.0.clone(), fn_count_on_ordered.1.clone(), fn_count_on_ordered.2.clone()), + required_sort: ["count", true, false, "nullable_col", true, false], + @ r#" +Input Plan: +SortExec: expr=[count@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 57: + test_window_case!( + partition_by: true, + window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::Following(ScalarValue::new_one(&DataType::UInt32)?))), + func: (fn_max_on_ordered.0.clone(), fn_max_on_ordered.1.clone(), fn_max_on_ordered.2.clone()), + required_sort: ["nullable_col", true, false, "max", true, false], + @ r#" +Input Plan: +SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 58: + test_window_case!( + partition_by: true, + window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)), + func: (fn_min_on_ordered.0.clone(), fn_min_on_ordered.1.clone(), fn_min_on_ordered.2.clone()), + required_sort: ["min", false, false, "nullable_col", true, false], + @ r#" +Input Plan: +SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 59: + test_window_case!( + partition_by: true, + window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)), + func: (fn_avg_on_ordered.0.clone(), fn_avg_on_ordered.1.clone(), fn_avg_on_ordered.2.clone()), + required_sort: ["avg", true, false], + @ r#" +Input Plan: +SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + // =============================================REGION ENDS============================================= + // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = + // ============================================REGION STARTS============================================ + // BoundedWindowAggExec + Sliding(bounded preceding, bounded following) + partition_by + on unordered column + // Case 60: + test_window_case!( + partition_by: true, + window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)), + func: (fn_count_on_unordered.0.clone(), fn_count_on_unordered.1.clone(), fn_count_on_unordered.2.clone()), + required_sort: ["nullable_col", true, false, "count", true, false], + @ r#" +Input Plan: +SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 61: + test_window_case!( + partition_by: true, + window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)), + func: (fn_max_on_unordered.0.clone(), fn_max_on_unordered.1.clone(), fn_max_on_unordered.2.clone()), + required_sort: ["nullable_col", true, false, "max", true, true], + @ r#" +Input Plan: +SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 62: + test_window_case!( + partition_by: true, + window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)), + func: (fn_min_on_unordered.0.clone(), fn_min_on_unordered.1.clone(), fn_min_on_unordered.2.clone()), + required_sort: ["nullable_col", true, false, "min", false, false], + @ r#" +Input Plan: +SortExec: expr=[nullable_col@0 ASC NULLS LAST, min@2 DESC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +SortExec: expr=[nullable_col@0 ASC NULLS LAST, min@2 DESC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + + // Case 63: + test_window_case!( + partition_by: true, + window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)), + func: (fn_avg_on_unordered.0.clone(), fn_avg_on_unordered.1.clone(), fn_avg_on_unordered.2.clone()), + required_sort: ["nullable_col", true, false], + @ r#" +Input Plan: +SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + +Optimized Plan: +BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet +"# + )?; + // =============================================REGION ENDS============================================= + + Ok(()) +} #[test] fn test_removes_unused_orthogonal_sort() -> Result<()> { let schema = create_test_schema3()?; From 6d6e8b77171e0778063d59cf4b8e69deee8356b4 Mon Sep 17 00:00:00 2001 From: blaginin Date: Sat, 4 Oct 2025 20:49:12 +0100 Subject: [PATCH 10/14] Clippy --- datafusion/core/tests/physical_optimizer/enforce_sorting.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs index d058395ff9850..47fc78196b7c6 100644 --- a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs +++ b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs @@ -71,7 +71,6 @@ use datafusion_catalog::streaming::StreamingTable; use futures::StreamExt; use insta::{assert_snapshot, Settings}; -use rstest::rstest; /// Create a sorted Csv exec fn csv_exec_sorted( @@ -1295,7 +1294,7 @@ async fn test_sort_merge_join_order_by_left() -> Result<()> { settings.add_filter( // join_type={} replace with join_type=... to avoid snapshot name issue - format!("join_type={}", join_type).as_str(), + format!("join_type={join_type}").as_str(), "join_type=...", ); @@ -1388,7 +1387,7 @@ async fn test_sort_merge_join_order_by_right() -> Result<()> { settings.add_filter( // join_type={} replace with join_type=... to avoid snapshot name issue - format!("join_type={}", join_type).as_str(), + format!("join_type={join_type}").as_str(), "join_type=...", ); From 0fd01c7a5de7eb96dcb0e365cbdd1165cc2535fe Mon Sep 17 00:00:00 2001 From: blaginin Date: Sat, 4 Oct 2025 21:26:23 +0100 Subject: [PATCH 11/14] Kill `expect_no_change` (we now have `Input / Optimized Plan` unification) --- .../physical_optimizer/enforce_sorting.rs | 78 ++++--------------- 1 file changed, 17 insertions(+), 61 deletions(-) diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs index 47fc78196b7c6..5bba62160634a 100644 --- a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs +++ b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs @@ -96,8 +96,6 @@ fn csv_exec_sorted( struct EnforceSortingTest { plan: Arc, repartition_sorts: bool, - /// If true, asserts that the input and optimized plans are the same - expect_no_change: bool, /// A message printed into the snapshot to describe the expected output expected_description: Option, } @@ -107,7 +105,6 @@ impl EnforceSortingTest { Self { plan, repartition_sorts: false, - expect_no_change: false, expected_description: None, } } @@ -118,11 +115,6 @@ impl EnforceSortingTest { self } - /// Set whether to expect no change in the plan - fn with_expect_no_change(mut self, expect_no_change: bool) -> Self { - self.expect_no_change = expect_no_change; - self - } /// Add an expected output description fn with_expected_description(mut self, description: &str) -> Self { @@ -209,11 +201,7 @@ impl EnforceSortingTest { "".to_string() }; - if self.expect_no_change { - assert_eq!(input_plan_string, optimized_plan_string, - "Expected no change in the plan, but the optimized plan differs from the input plan" - ); - + if input_plan_string == optimized_plan_string{ return format!( "{expected_description}Input / Optimized Plan:\n{input_plan_string}", ); @@ -314,8 +302,7 @@ async fn test_union_inputs_sorted() -> Result<()> { // one input to the union is already sorted, one is not. let test = EnforceSortingTest::new(physical_plan) .with_repartition_sorts(true) - .with_expected_description("// should not add a sort at the output of the union, input plan should not be changed") - .with_expect_no_change(true); + .with_expected_description("// should not add a sort at the output of the union, input plan should not be changed"); assert_snapshot!(test.run(), @r" // should not add a sort at the output of the union, input plan should not be changed @@ -348,8 +335,7 @@ async fn test_union_inputs_different_sorted() -> Result<()> { // one input to the union is already sorted, one is not. let test = EnforceSortingTest::new(physical_plan) .with_repartition_sorts(true) - .with_expected_description("// should not add a sort at the output of the union, input plan should not be changed") - .with_expect_no_change(true); + .with_expected_description("// should not add a sort at the output of the union, input plan should not be changed"); assert_snapshot!(test.run(), @r" // should not add a sort at the output of the union, input plan should not be changed Input / Optimized Plan: @@ -1640,14 +1626,7 @@ async fn test_with_lost_ordering_bounded() -> Result<()> { EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(false); assert_snapshot!(test_no_repartition_sorts.run(), @r" - Input Plan: - SortExec: expr=[a@0 ASC], preserve_partitioning=[false] - CoalescePartitionsExec - RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10 - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=csv, has_header=false - - Optimized Plan: + Input / Optimized Plan: SortExec: expr=[a@0 ASC], preserve_partitioning=[false] CoalescePartitionsExec RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10 @@ -1689,13 +1668,7 @@ async fn test_do_not_pushdown_through_spm() -> Result<()> { let test = EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); assert_snapshot!(test.run(), @r" - Input Plan: - SortExec: expr=[b@1 ASC], preserve_partitioning=[false] - SortPreservingMergeExec: [a@0 ASC, b@1 ASC] - RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 - DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC, b@1 ASC], file_type=csv, has_header=false - - Optimized Plan: + Input / Optimized Plan: SortExec: expr=[b@1 ASC], preserve_partitioning=[false] SortPreservingMergeExec: [a@0 ASC, b@1 ASC] RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 @@ -1787,8 +1760,8 @@ async fn test_not_replaced_with_partial_sort_for_bounded_input() -> Result<()> { parquet_input, ); let test = EnforceSortingTest::new(physical_plan.clone()) - .with_repartition_sorts(false) - .with_expect_no_change(true); + .with_repartition_sorts(false); + assert_snapshot!(test.run(), @r" Input / Optimized Plan: SortExec: expr=[a@0 ASC, b@1 ASC, c@2 ASC], preserve_partitioning=[false] @@ -2179,13 +2152,7 @@ async fn test_do_not_pushdown_through_limit() -> Result<()> { let test = EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); assert_snapshot!(test.run(), @r" - Input Plan: - SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] - GlobalLimitExec: skip=0, fetch=5 - SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false] - DataSourceExec: partitions=1, partition_sizes=[0] - - Optimized Plan: + Input / Optimized Plan: SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] GlobalLimitExec: skip=0, fetch=5 SortExec: expr=[non_nullable_col@1 ASC], preserve_partitioning=[false] @@ -2465,9 +2432,9 @@ async fn test_push_with_required_input_ordering_prohibited() -> Result<()> { .into_arc(); let plan = sort_exec(ordering_b, plan); let test = EnforceSortingTest::new(plan.clone()) - .with_repartition_sorts(true) + .with_repartition_sorts(true); // should not be able to push shorts - .with_expect_no_change(true); + assert_snapshot!(test.run(), @r" Input / Optimized Plan: SortExec: expr=[b@1 ASC], preserve_partitioning=[false] @@ -2558,8 +2525,7 @@ async fn test_not_replaced_with_partial_sort_for_unbounded_input() -> Result<()> unbounded_input, ); let test = EnforceSortingTest::new(physical_plan.clone()) - .with_repartition_sorts(true) - .with_expect_no_change(true); + .with_repartition_sorts(true); assert_snapshot!(test.run(), @r" Input / Optimized Plan: SortExec: expr=[a@0 ASC, b@1 ASC, c@2 ASC], preserve_partitioning=[false] @@ -2895,16 +2861,11 @@ WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: tru func: (fn_min_on_ordered.0.clone(), fn_min_on_ordered.1.clone(), fn_min_on_ordered.2.clone()), required_sort: ["min", false, false, "nullable_col", true, false], @ r#" -Input Plan: -SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] - WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet - -Optimized Plan: -SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] - WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet -"# + Input / Optimized Plan: + SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + "# )?; // Case 11: @@ -3991,13 +3952,8 @@ fn test_keeps_used_orthogonal_sort() -> Result<()> { "// Test: should keep the orthogonal sort, since it modifies the output:", ); assert_snapshot!(test.run(), @r" - Input Plan: - SortExec: expr=[b@1 ASC, c@2 ASC], preserve_partitioning=[false] - SortExec: TopK(fetch=3), expr=[a@0 ASC], preserve_partitioning=[false] - StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC] - // Test: should keep the orthogonal sort, since it modifies the output: - Optimized Plan: + Input / Optimized Plan: SortExec: expr=[b@1 ASC, c@2 ASC], preserve_partitioning=[false] SortExec: TopK(fetch=3), expr=[a@0 ASC], preserve_partitioning=[false] StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC] From d1e337d72d893b33392024dcaf3b22ca02e66d6b Mon Sep 17 00:00:00 2001 From: blaginin Date: Sat, 4 Oct 2025 21:28:09 +0100 Subject: [PATCH 12/14] Fmt --- .../tests/physical_optimizer/enforce_sorting.rs | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs index 5bba62160634a..d97affac95dc1 100644 --- a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs +++ b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs @@ -115,7 +115,6 @@ impl EnforceSortingTest { self } - /// Add an expected output description fn with_expected_description(mut self, description: &str) -> Self { self.expected_description = Some(description.to_string()); @@ -201,7 +200,7 @@ impl EnforceSortingTest { "".to_string() }; - if input_plan_string == optimized_plan_string{ + if input_plan_string == optimized_plan_string { return format!( "{expected_description}Input / Optimized Plan:\n{input_plan_string}", ); @@ -1759,8 +1758,8 @@ async fn test_not_replaced_with_partial_sort_for_bounded_input() -> Result<()> { .into(), parquet_input, ); - let test = EnforceSortingTest::new(physical_plan.clone()) - .with_repartition_sorts(false); + let test = + EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(false); assert_snapshot!(test.run(), @r" Input / Optimized Plan: @@ -2431,9 +2430,8 @@ async fn test_push_with_required_input_ordering_prohibited() -> Result<()> { .with_maintains_input_order(true) .into_arc(); let plan = sort_exec(ordering_b, plan); - let test = EnforceSortingTest::new(plan.clone()) - .with_repartition_sorts(true); - // should not be able to push shorts + let test = EnforceSortingTest::new(plan.clone()).with_repartition_sorts(true); + // should not be able to push shorts assert_snapshot!(test.run(), @r" Input / Optimized Plan: @@ -2524,8 +2522,8 @@ async fn test_not_replaced_with_partial_sort_for_unbounded_input() -> Result<()> .into(), unbounded_input, ); - let test = EnforceSortingTest::new(physical_plan.clone()) - .with_repartition_sorts(true); + let test = + EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true); assert_snapshot!(test.run(), @r" Input / Optimized Plan: SortExec: expr=[a@0 ASC, b@1 ASC, c@2 ASC], preserve_partitioning=[false] From 4ac1e56c63692ae22e5f853027c45eabd2c01de2 Mon Sep 17 00:00:00 2001 From: blaginin Date: Sat, 4 Oct 2025 23:46:52 +0100 Subject: [PATCH 13/14] Merge more plans with the same before and output --- .../physical_optimizer/enforce_sorting.rs | 525 ++++++------------ 1 file changed, 175 insertions(+), 350 deletions(-) diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs index d97affac95dc1..f6bd27f363282 100644 --- a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs +++ b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs @@ -2873,16 +2873,11 @@ WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: tru func: (fn_avg_on_ordered.0.clone(), fn_avg_on_ordered.1.clone(), fn_avg_on_ordered.2.clone()), required_sort: ["avg", true, false, "nullable_col", true, false], @ r#" -Input Plan: -SortExec: expr=[avg@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] - WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet - -Optimized Plan: -SortExec: expr=[avg@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] - WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet -"# + Input / Optimized Plan: + SortExec: expr=[avg@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + "# )?; // =============================================REGION ENDS============================================= // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = @@ -2895,16 +2890,11 @@ SortExec: expr=[avg@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_p func: (fn_count_on_unordered.0.clone(), fn_count_on_unordered.1.clone(), fn_count_on_unordered.2.clone()), required_sort: ["non_nullable_col", true, false, "count", true, false], @ r#" -Input Plan: -SortExec: expr=[non_nullable_col@1 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false] - WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet - -Optimized Plan: -SortExec: expr=[non_nullable_col@1 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false] - WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet -"# + Input / Optimized Plan: + SortExec: expr=[non_nullable_col@1 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + "# )?; // Case 13: @@ -2914,16 +2904,11 @@ SortExec: expr=[non_nullable_col@1 ASC NULLS LAST, count@2 ASC NULLS LAST], pres func: (fn_max_on_unordered.0.clone(), fn_max_on_unordered.1.clone(), fn_max_on_unordered.2.clone()), required_sort: ["non_nullable_col", true, false, "max", false, false], @ r#" -Input Plan: -SortExec: expr=[non_nullable_col@1 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false] - WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet - -Optimized Plan: -SortExec: expr=[non_nullable_col@1 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false] - WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet -"# + Input / Optimized Plan: + SortExec: expr=[non_nullable_col@1 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + "# )?; // Case 14: @@ -2933,16 +2918,11 @@ SortExec: expr=[non_nullable_col@1 ASC NULLS LAST, max@2 DESC NULLS LAST], prese func: (fn_min_on_unordered.0.clone(), fn_min_on_unordered.1.clone(), fn_min_on_unordered.2.clone()), required_sort: ["min", false, false, "non_nullable_col", true, false], @ r#" -Input Plan: -SortExec: expr=[min@2 DESC NULLS LAST, non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false] - WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet - -Optimized Plan: -SortExec: expr=[min@2 DESC NULLS LAST, non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false] - WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet -"# + Input / Optimized Plan: + SortExec: expr=[min@2 DESC NULLS LAST, non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + "# )?; // Case 15: @@ -2952,16 +2932,11 @@ SortExec: expr=[min@2 DESC NULLS LAST, non_nullable_col@1 ASC NULLS LAST], prese func: (fn_avg_on_unordered.0.clone(), fn_avg_on_unordered.1.clone(), fn_avg_on_unordered.2.clone()), required_sort: ["avg", true, false, "nullable_col", true, false], @ r#" -Input Plan: -SortExec: expr=[avg@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] - WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet - -Optimized Plan: -SortExec: expr=[avg@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] - WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet -"# + Input / Optimized Plan: + SortExec: expr=[avg@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + "# )?; // =============================================REGION ENDS============================================= @@ -3029,16 +3004,11 @@ WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: tru func: (fn_avg_on_ordered.0.clone(), fn_avg_on_ordered.1.clone(), fn_avg_on_ordered.2.clone()), required_sort: ["avg", false, false, "nullable_col", true, false], @ r#" -Input Plan: -SortExec: expr=[avg@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] - WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet - -Optimized Plan: -SortExec: expr=[avg@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] - WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet -"# + Input / Optimized Plan: + SortExec: expr=[avg@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + "# )?; // =============================================REGION ENDS============================================= // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = @@ -3051,16 +3021,11 @@ SortExec: expr=[avg@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_ func: (fn_count_on_unordered.0.clone(), fn_count_on_unordered.1.clone(), fn_count_on_unordered.2.clone()), required_sort: ["nullable_col", true, false, "count", true, false], @ r#" -Input Plan: -SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false] - WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet - -Optimized Plan: -SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false] - WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet -"# + Input / Optimized Plan: + SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + "# )?; // Case 21: @@ -3088,16 +3053,11 @@ WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: tru func: (fn_min_on_unordered.0.clone(), fn_min_on_unordered.1.clone(), fn_min_on_unordered.2.clone()), required_sort: ["min", true, false, "nullable_col", true, false], @ r#" -Input Plan: -SortExec: expr=[min@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] - WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet - -Optimized Plan: -SortExec: expr=[min@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] - WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet -"# + Input / Optimized Plan: + SortExec: expr=[min@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + "# )?; // Case 23: @@ -3107,16 +3067,11 @@ SortExec: expr=[min@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_p func: (fn_avg_on_unordered.0.clone(), fn_avg_on_unordered.1.clone(), fn_avg_on_unordered.2.clone()), required_sort: ["avg", false, false, "nullable_col", true, false], @ r#" -Input Plan: -SortExec: expr=[avg@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] - WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet - -Optimized Plan: -SortExec: expr=[avg@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] - WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet -"# + Input / Optimized Plan: + SortExec: expr=[avg@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + "# )?; // =============================================REGION ENDS============================================= // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = @@ -3147,16 +3102,11 @@ WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: func: (fn_max_on_ordered.0.clone(), fn_max_on_ordered.1.clone(), fn_max_on_ordered.2.clone()), required_sort: ["nullable_col", true, false, "max", true, false], @ r#" -Input Plan: -SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false] - WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet - -Optimized Plan: -SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false] - WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet -"# + Input / Optimized Plan: + SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + "# )?; // Case 26: @@ -3166,16 +3116,11 @@ SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_p func: (fn_min_on_ordered.0.clone(), fn_min_on_ordered.1.clone(), fn_min_on_ordered.2.clone()), required_sort: ["min", false, false], @ r#" -Input Plan: -SortExec: expr=[min@2 DESC NULLS LAST], preserve_partitioning=[false] - WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet - -Optimized Plan: -SortExec: expr=[min@2 DESC NULLS LAST], preserve_partitioning=[false] - WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet -"# + Input / Optimized Plan: + SortExec: expr=[min@2 DESC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + "# )?; // Case 27: @@ -3185,16 +3130,11 @@ SortExec: expr=[min@2 DESC NULLS LAST], preserve_partitioning=[false] func: (fn_avg_on_ordered.0.clone(), fn_avg_on_ordered.1.clone(), fn_avg_on_ordered.2.clone()), required_sort: ["avg", false, false], @ r#" -Input Plan: -SortExec: expr=[avg@2 DESC NULLS LAST], preserve_partitioning=[false] - WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet - -Optimized Plan: -SortExec: expr=[avg@2 DESC NULLS LAST], preserve_partitioning=[false] - WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet -"# + Input / Optimized Plan: + SortExec: expr=[avg@2 DESC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + "# )?; // =============================================REGION ENDS============================================= // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = @@ -3207,16 +3147,11 @@ SortExec: expr=[avg@2 DESC NULLS LAST], preserve_partitioning=[false] func: (fn_count_on_unordered.0.clone(), fn_count_on_unordered.1.clone(), fn_count_on_unordered.2.clone()), required_sort: ["count", false, false, "nullable_col", true, false], @ r#" -Input Plan: -SortExec: expr=[count@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] - WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet - -Optimized Plan: -SortExec: expr=[count@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] - WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet -"# + Input / Optimized Plan: + SortExec: expr=[count@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + "# )?; // Case 29: @@ -3244,16 +3179,11 @@ WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: tru func: (fn_min_on_unordered.0.clone(), fn_min_on_unordered.1.clone(), fn_min_on_unordered.2.clone()), required_sort: ["min", false, false], @ r#" -Input Plan: -SortExec: expr=[min@2 DESC NULLS LAST], preserve_partitioning=[false] - WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet - -Optimized Plan: -SortExec: expr=[min@2 DESC NULLS LAST], preserve_partitioning=[false] - WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet -"# + Input / Optimized Plan: + SortExec: expr=[min@2 DESC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + "# )?; // Case 31: @@ -3263,16 +3193,11 @@ SortExec: expr=[min@2 DESC NULLS LAST], preserve_partitioning=[false] func: (fn_avg_on_unordered.0.clone(), fn_avg_on_unordered.1.clone(), fn_avg_on_unordered.2.clone()), required_sort: ["nullable_col", true, false, "avg", true, false], @ r#" -Input Plan: -SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false] - WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet - -Optimized Plan: -SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false] - WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet -"# + Input / Optimized Plan: + SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false] + WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + "# )?; // =============================================REGION ENDS============================================= @@ -3304,16 +3229,11 @@ BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nulla func: (fn_max_on_ordered.0.clone(), fn_max_on_ordered.1.clone(), fn_max_on_ordered.2.clone()), required_sort: ["max", false, false, "nullable_col", true, false], @ r#" -Input Plan: -SortExec: expr=[max@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet - -Optimized Plan: -SortExec: expr=[max@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet -"# + Input / Optimized Plan: + SortExec: expr=[max@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + "# )?; // Case 34: @@ -3341,16 +3261,11 @@ BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: func: (fn_avg_on_ordered.0.clone(), fn_avg_on_ordered.1.clone(), fn_avg_on_ordered.2.clone()), required_sort: ["nullable_col", true, false, "avg", true, false], @ r#" -Input Plan: -SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet - -Optimized Plan: -SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet -"# + Input / Optimized Plan: + SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + "# )?; // =============================================REGION ENDS============================================= // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = @@ -3399,16 +3314,11 @@ BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: func: (fn_min_on_unordered.0.clone(), fn_min_on_unordered.1.clone(), fn_min_on_unordered.2.clone()), required_sort: ["min", false, true, "nullable_col", true, false], @ r#" -Input Plan: -SortExec: expr=[min@2 DESC, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet - -Optimized Plan: -SortExec: expr=[min@2 DESC, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet -"# + Input / Optimized Plan: + SortExec: expr=[min@2 DESC, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + "# )?; // Case 39: @@ -3418,16 +3328,11 @@ SortExec: expr=[min@2 DESC, nullable_col@0 ASC NULLS LAST], preserve_partitionin func: (fn_avg_on_unordered.0.clone(), fn_avg_on_unordered.1.clone(), fn_avg_on_unordered.2.clone()), required_sort: ["avg", true, false], @ r#" -Input Plan: -SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet - -Optimized Plan: -SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet -"# + Input / Optimized Plan: + SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + "# )?; // =============================================REGION ENDS============================================= // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = @@ -3458,16 +3363,11 @@ BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nulla func: (fn_max_on_ordered.0.clone(), fn_max_on_ordered.1.clone(), fn_max_on_ordered.2.clone()), required_sort: ["max", true, false, "nullable_col", true, false], @ r#" -Input Plan: -SortExec: expr=[max@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet - -Optimized Plan: -SortExec: expr=[max@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet -"# + Input / Optimized Plan: + SortExec: expr=[max@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + "# )?; // Case 42: @@ -3477,16 +3377,11 @@ SortExec: expr=[max@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_p func: (fn_min_on_ordered.0.clone(), fn_min_on_ordered.1.clone(), fn_min_on_ordered.2.clone()), required_sort: ["min", false, false, "nullable_col", true, false], @ r#" -Input Plan: -SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet - -Optimized Plan: -SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet -"# + Input / Optimized Plan: + SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + "# )?; // Case 43: @@ -3496,16 +3391,11 @@ SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_ func: (fn_avg_on_ordered.0.clone(), fn_avg_on_ordered.1.clone(), fn_avg_on_ordered.2.clone()), required_sort: ["nullable_col", true, false, "avg", true, false], @ r#" -Input Plan: -SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet - -Optimized Plan: -SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet -"# + Input / Optimized Plan: + SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + "# )?; // =============================================REGION ENDS============================================= @@ -3519,16 +3409,11 @@ SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_p func: (fn_count_on_unordered.0.clone(), fn_count_on_unordered.1.clone(), fn_count_on_unordered.2.clone()), required_sort: ["count", true, true], @ r#" -Input Plan: -SortExec: expr=[count@2 ASC], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet - -Optimized Plan: -SortExec: expr=[count@2 ASC], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet -"# + Input / Optimized Plan: + SortExec: expr=[count@2 ASC], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + "# )?; // Case 45: @@ -3538,16 +3423,11 @@ SortExec: expr=[count@2 ASC], preserve_partitioning=[false] func: (fn_max_on_unordered.0.clone(), fn_max_on_unordered.1.clone(), fn_max_on_unordered.2.clone()), required_sort: ["nullable_col", true, false, "max", false, false], @ r#" -Input Plan: -SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet - -Optimized Plan: -SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet -"# + Input / Optimized Plan: + SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + "# )?; // Case 46: @@ -3615,16 +3495,11 @@ BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nulla func: (fn_max_on_ordered.0.clone(), fn_max_on_ordered.1.clone(), fn_max_on_ordered.2.clone()), required_sort: ["max", true, false], @ r#" -Input Plan: -SortExec: expr=[max@2 ASC NULLS LAST], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet - -Optimized Plan: -SortExec: expr=[max@2 ASC NULLS LAST], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet -"# + Input / Optimized Plan: + SortExec: expr=[max@2 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + "# )?; // Case 50: @@ -3652,16 +3527,11 @@ BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: func: (fn_avg_on_ordered.0.clone(), fn_avg_on_ordered.1.clone(), fn_avg_on_ordered.2.clone()), required_sort: ["avg", true, false], @ r#" -Input Plan: -SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet - -Optimized Plan: -SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet -"# + Input / Optimized Plan: + SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + "# )?; // =============================================REGION ENDS============================================= @@ -3675,16 +3545,11 @@ SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false] func: (fn_count_on_unordered.0.clone(), fn_count_on_unordered.1.clone(), fn_count_on_unordered.2.clone()), required_sort: ["count", true, false, "nullable_col", true, false], @ r#" -Input Plan: -SortExec: expr=[count@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet - -Optimized Plan: -SortExec: expr=[count@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet -"# + Input / Optimized Plan: + SortExec: expr=[count@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + "# )?; // Case 53: @@ -3694,16 +3559,11 @@ SortExec: expr=[count@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve func: (fn_max_on_unordered.0.clone(), fn_max_on_unordered.1.clone(), fn_max_on_unordered.2.clone()), required_sort: ["nullable_col", true, false, "max", true, false], @ r#" -Input Plan: -SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet - -Optimized Plan: -SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet -"# + Input / Optimized Plan: + SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + "# )?; // Case 54: @@ -3713,16 +3573,11 @@ SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_p func: (fn_min_on_unordered.0.clone(), fn_min_on_unordered.1.clone(), fn_min_on_unordered.2.clone()), required_sort: ["min", true, false], @ r#" -Input Plan: -SortExec: expr=[min@2 ASC NULLS LAST], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet - -Optimized Plan: -SortExec: expr=[min@2 ASC NULLS LAST], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet -"# + Input / Optimized Plan: + SortExec: expr=[min@2 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + "# )?; // Case 55: @@ -3772,16 +3627,11 @@ BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nulla func: (fn_max_on_ordered.0.clone(), fn_max_on_ordered.1.clone(), fn_max_on_ordered.2.clone()), required_sort: ["nullable_col", true, false, "max", true, false], @ r#" -Input Plan: -SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet - -Optimized Plan: -SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet -"# + Input / Optimized Plan: + SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + "# )?; // Case 58: @@ -3791,16 +3641,11 @@ SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_p func: (fn_min_on_ordered.0.clone(), fn_min_on_ordered.1.clone(), fn_min_on_ordered.2.clone()), required_sort: ["min", false, false, "nullable_col", true, false], @ r#" -Input Plan: -SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet - -Optimized Plan: -SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet -"# + Input / Optimized Plan: + SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + "# )?; // Case 59: @@ -3810,16 +3655,11 @@ SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_ func: (fn_avg_on_ordered.0.clone(), fn_avg_on_ordered.1.clone(), fn_avg_on_ordered.2.clone()), required_sort: ["avg", true, false], @ r#" -Input Plan: -SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet - -Optimized Plan: -SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet -"# + Input / Optimized Plan: + SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + "# )?; // =============================================REGION ENDS============================================= // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = @@ -3832,16 +3672,11 @@ SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false] func: (fn_count_on_unordered.0.clone(), fn_count_on_unordered.1.clone(), fn_count_on_unordered.2.clone()), required_sort: ["nullable_col", true, false, "count", true, false], @ r#" -Input Plan: -SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet - -Optimized Plan: -SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet -"# + Input / Optimized Plan: + SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + "# )?; // Case 61: @@ -3851,16 +3686,11 @@ SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve func: (fn_max_on_unordered.0.clone(), fn_max_on_unordered.1.clone(), fn_max_on_unordered.2.clone()), required_sort: ["nullable_col", true, false, "max", true, true], @ r#" -Input Plan: -SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet - -Optimized Plan: -SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet -"# + Input / Optimized Plan: + SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + "# )?; // Case 62: @@ -3870,16 +3700,11 @@ SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC], preserve_partitioning func: (fn_min_on_unordered.0.clone(), fn_min_on_unordered.1.clone(), fn_min_on_unordered.2.clone()), required_sort: ["nullable_col", true, false, "min", false, false], @ r#" -Input Plan: -SortExec: expr=[nullable_col@0 ASC NULLS LAST, min@2 DESC NULLS LAST], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet - -Optimized Plan: -SortExec: expr=[nullable_col@0 ASC NULLS LAST, min@2 DESC NULLS LAST], preserve_partitioning=[false] - BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] - DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet -"# + Input / Optimized Plan: + SortExec: expr=[nullable_col@0 ASC NULLS LAST, min@2 DESC NULLS LAST], preserve_partitioning=[false] + BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted] + DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet + "# )?; // Case 63: From d4fcfbdea4f4737aafa10df9afb593a164126cdf Mon Sep 17 00:00:00 2001 From: blaginin Date: Sat, 4 Oct 2025 21:15:57 +0100 Subject: [PATCH 14/14] Kill `expected_description` --- .../physical_optimizer/enforce_sorting.rs | 131 ++++++------------ 1 file changed, 40 insertions(+), 91 deletions(-) diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs index f6bd27f363282..a2c604a84e76f 100644 --- a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs +++ b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs @@ -96,8 +96,6 @@ fn csv_exec_sorted( struct EnforceSortingTest { plan: Arc, repartition_sorts: bool, - /// A message printed into the snapshot to describe the expected output - expected_description: Option, } impl EnforceSortingTest { @@ -105,7 +103,6 @@ impl EnforceSortingTest { Self { plan, repartition_sorts: false, - expected_description: None, } } @@ -115,12 +112,6 @@ impl EnforceSortingTest { self } - /// Add an expected output description - fn with_expected_description(mut self, description: &str) -> Self { - self.expected_description = Some(description.to_string()); - self - } - /// Runs the enforce sorting test and returns a string with the input and /// optimized plan as strings for snapshot comparison using insta fn run(&self) -> String { @@ -193,22 +184,13 @@ impl EnforceSortingTest { .indent(true) .to_string(); - let expected_description = - if let Some(desc) = self.expected_description.as_deref() { - format!("{desc}\n") - } else { - "".to_string() - }; - if input_plan_string == optimized_plan_string { - return format!( - "{expected_description}Input / Optimized Plan:\n{input_plan_string}", - ); - } - - format!( - "Input Plan:\n{input_plan_string}\n{expected_description}Optimized Plan:\n{optimized_plan_string}", + format!("Input / Optimized Plan:\n{input_plan_string}",) + } else { + format!( + "Input Plan:\n{input_plan_string}\nOptimized Plan:\n{optimized_plan_string}", ) + } } } @@ -260,9 +242,8 @@ async fn test_do_not_remove_sort_with_limit() -> Result<()> { let repartition = repartition_exec(union); let physical_plan = sort_preserving_merge_exec(ordering, repartition); - let test = EnforceSortingTest::new(physical_plan) - .with_repartition_sorts(true) - .with_expected_description("// We should keep the bottom `SortExec`."); + let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r" Input Plan: SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC] @@ -273,7 +254,6 @@ async fn test_do_not_remove_sort_with_limit() -> Result<()> { SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false] DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet - // We should keep the bottom `SortExec`. Optimized Plan: SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC] SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[true] @@ -284,7 +264,7 @@ async fn test_do_not_remove_sort_with_limit() -> Result<()> { SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false] DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet "); - + // We should keep the bottom `SortExec`. Ok(()) } @@ -299,12 +279,9 @@ async fn test_union_inputs_sorted() -> Result<()> { let physical_plan = sort_preserving_merge_exec(ordering, union); // one input to the union is already sorted, one is not. - let test = EnforceSortingTest::new(physical_plan) - .with_repartition_sorts(true) - .with_expected_description("// should not add a sort at the output of the union, input plan should not be changed"); + let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); assert_snapshot!(test.run(), @r" - // should not add a sort at the output of the union, input plan should not be changed Input / Optimized Plan: SortPreservingMergeExec: [nullable_col@0 ASC] UnionExec @@ -312,6 +289,7 @@ async fn test_union_inputs_sorted() -> Result<()> { SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet "); + // should not add a sort at the output of the union, input plan should not be changed Ok(()) } @@ -332,11 +310,9 @@ async fn test_union_inputs_different_sorted() -> Result<()> { let physical_plan = sort_preserving_merge_exec(ordering, union); // one input to the union is already sorted, one is not. - let test = EnforceSortingTest::new(physical_plan) - .with_repartition_sorts(true) - .with_expected_description("// should not add a sort at the output of the union, input plan should not be changed"); + let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r" - // should not add a sort at the output of the union, input plan should not be changed Input / Optimized Plan: SortPreservingMergeExec: [nullable_col@0 ASC] UnionExec @@ -344,6 +320,7 @@ async fn test_union_inputs_different_sorted() -> Result<()> { SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet "); + // should not add a sort at the output of the union, input plan should not be changed Ok(()) } @@ -407,10 +384,7 @@ async fn test_union_inputs_different_sorted3() -> Result<()> { // First input to the union is not Sorted (SortExec is finer than required ordering by the SortPreservingMergeExec above). // Second input to the union is already Sorted (matches with the required ordering by the SortPreservingMergeExec above). // Third input to the union is not Sorted (SortExec is matches required ordering by the SortPreservingMergeExec above). - let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true) - .with_expected_description( - "// should adjust sorting in the first input of the union such that it is not unnecessarily fine" - ); + let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); assert_snapshot!(test.run(), @r" Input Plan: @@ -422,7 +396,6 @@ async fn test_union_inputs_different_sorted3() -> Result<()> { SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet - // should adjust sorting in the first input of the union such that it is not unnecessarily fine Optimized Plan: SortPreservingMergeExec: [nullable_col@0 ASC] UnionExec @@ -432,6 +405,7 @@ async fn test_union_inputs_different_sorted3() -> Result<()> { SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet "); + // should adjust sorting in the first input of the union such that it is not unnecessarily fine Ok(()) } @@ -556,12 +530,7 @@ async fn test_union_inputs_different_sorted6() -> Result<()> { // At the same time, this ordering requirement is unnecessarily fine. // The final plan should be valid AND the ordering of the third child // shouldn't be finer than necessary. - let test = EnforceSortingTest::new(physical_plan) - .with_repartition_sorts(true) - .with_expected_description( - "// Should adjust the requirement in the third input of the union so\n\ - // that it is not unnecessarily fine.", - ); + let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); assert_snapshot!(test.run(), @r" Input Plan: SortPreservingMergeExec: [nullable_col@0 ASC] @@ -573,8 +542,6 @@ async fn test_union_inputs_different_sorted6() -> Result<()> { RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet - // Should adjust the requirement in the third input of the union so - // that it is not unnecessarily fine. Optimized Plan: SortPreservingMergeExec: [nullable_col@0 ASC] UnionExec @@ -585,6 +552,8 @@ async fn test_union_inputs_different_sorted6() -> Result<()> { RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1 DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet "); + // Should adjust the requirement in the third input of the union so + // that it is not unnecessarily fine. Ok(()) } @@ -605,8 +574,7 @@ async fn test_union_inputs_different_sorted7() -> Result<()> { let physical_plan = sort_preserving_merge_exec(ordering2, union); // Union has unnecessarily fine ordering below it. We should be able to replace them with absolutely necessary ordering. - let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true) - .with_expected_description("// Union preserves the inputs ordering and we should not change any of the SortExecs under UnionExec"); + let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); assert_snapshot!(test.run(), @r" Input Plan: SortPreservingMergeExec: [nullable_col@0 ASC] @@ -616,7 +584,6 @@ async fn test_union_inputs_different_sorted7() -> Result<()> { SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false] DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet - // Union preserves the inputs ordering and we should not change any of the SortExecs under UnionExec Optimized Plan: SortPreservingMergeExec: [nullable_col@0 ASC] UnionExec @@ -625,6 +592,7 @@ async fn test_union_inputs_different_sorted7() -> Result<()> { SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet "); + // Union preserves the inputs ordering, and we should not change any of the SortExecs under UnionExec Ok(()) } @@ -663,12 +631,7 @@ async fn test_union_inputs_different_sorted8() -> Result<()> { // The `UnionExec` doesn't preserve any of the inputs ordering in the // example below. - let test = EnforceSortingTest::new(physical_plan) - .with_repartition_sorts(true) - .with_expected_description( - "// Since `UnionExec` doesn't preserve ordering in the plan above.\n\ - // We shouldn't keep SortExecs in the plan.", - ); + let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); assert_snapshot!(test.run(), @r" Input Plan: UnionExec @@ -677,13 +640,13 @@ async fn test_union_inputs_different_sorted8() -> Result<()> { SortExec: expr=[nullable_col@0 DESC NULLS LAST, non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false] DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet - // Since `UnionExec` doesn't preserve ordering in the plan above. - // We shouldn't keep SortExecs in the plan. Optimized Plan: UnionExec DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet "); + // Since `UnionExec` doesn't preserve ordering in the plan above. + // We shouldn't keep SortExecs in the plan. Ok(()) } @@ -1464,11 +1427,8 @@ async fn test_sort_merge_join_complex_order_by() -> Result<()> { .into(); let physical_plan = sort_preserving_merge_exec(ordering, join.clone()); - let test = EnforceSortingTest::new(physical_plan) - .with_repartition_sorts(true) - .with_expected_description( - "// can not push down the sort requirements, need to add SortExec", - ); + let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r" Input Plan: SortPreservingMergeExec: [col_b@3 ASC, col_a@2 ASC] @@ -1476,7 +1436,6 @@ async fn test_sort_merge_join_complex_order_by() -> Result<()> { DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet - // can not push down the sort requirements, need to add SortExec Optimized Plan: SortExec: expr=[col_b@3 ASC, nullable_col@0 ASC], preserve_partitioning=[false] SortMergeJoin: join_type=Inner, on=[(nullable_col@0, col_a@0)] @@ -1485,6 +1444,7 @@ async fn test_sort_merge_join_complex_order_by() -> Result<()> { SortExec: expr=[col_a@0 ASC], preserve_partitioning=[false] DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet "); + // can not push down the sort requirements, need to add SortExec // order by (nullable_col, col_b, col_a) let ordering2 = [ @@ -1494,11 +1454,8 @@ async fn test_sort_merge_join_complex_order_by() -> Result<()> { ] .into(); let physical_plan = sort_preserving_merge_exec(ordering2, join); - let test = EnforceSortingTest::new(physical_plan) - .with_repartition_sorts(true) - .with_expected_description( - "// Can push down the sort requirements since col_a = nullable_col", - ); + let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r" Input Plan: SortPreservingMergeExec: [nullable_col@0 ASC, col_b@3 ASC, col_a@2 ASC] @@ -1506,7 +1463,6 @@ async fn test_sort_merge_join_complex_order_by() -> Result<()> { DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet - // Can push down the sort requirements since col_a = nullable_col Optimized Plan: SortMergeJoin: join_type=Inner, on=[(nullable_col@0, col_a@0)] SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false] @@ -1514,6 +1470,7 @@ async fn test_sort_merge_join_complex_order_by() -> Result<()> { SortExec: expr=[col_a@0 ASC, col_b@1 ASC], preserve_partitioning=[false] DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet "); + // Can push down the sort requirements since col_a = nullable_col Ok(()) } @@ -2465,9 +2422,8 @@ async fn test_push_with_required_input_ordering_allowed() -> Result<()> { " DataSourceExec: partitions=1, partition_sizes=[0]", ]; */ - let test = EnforceSortingTest::new(plan.clone()) - .with_repartition_sorts(true) - .with_expected_description("// Should be able to push down"); + let test = EnforceSortingTest::new(plan.clone()).with_repartition_sorts(true); + assert_snapshot!(test.run(), @r" Input Plan: SortExec: expr=[a@0 ASC, b@1 ASC], preserve_partitioning=[false] @@ -2475,12 +2431,12 @@ async fn test_push_with_required_input_ordering_allowed() -> Result<()> { SortExec: expr=[a@0 ASC], preserve_partitioning=[false] DataSourceExec: partitions=1, partition_sizes=[0] - // Should be able to push down Optimized Plan: RequiredInputOrderingExec SortExec: expr=[a@0 ASC, b@1 ASC], preserve_partitioning=[false] DataSourceExec: partitions=1, partition_sizes=[0] "); + // Should be able to push down Ok(()) } @@ -3738,11 +3694,7 @@ fn test_removes_unused_orthogonal_sort() -> Result<()> { let output_sort = sort_exec(input_ordering, orthogonal_sort); // same sort as data source // Test scenario/input has an orthogonal sort: - let test = EnforceSortingTest::new(output_sort) - .with_repartition_sorts(true) - .with_expected_description( - "// Test: should remove orthogonal sort, and the uppermost (unneeded) sort:", - ); + let test = EnforceSortingTest::new(output_sort).with_repartition_sorts(true); assert_snapshot!(test.run(), @r" Input Plan: @@ -3750,10 +3702,10 @@ fn test_removes_unused_orthogonal_sort() -> Result<()> { SortExec: expr=[a@0 ASC], preserve_partitioning=[false] StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC] - // Test: should remove orthogonal sort, and the uppermost (unneeded) sort: Optimized Plan: StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC] "); + // Test: should remove orthogonal sort, and the uppermost (unneeded) sort: Ok(()) } @@ -3769,19 +3721,16 @@ fn test_keeps_used_orthogonal_sort() -> Result<()> { let output_sort = sort_exec(input_ordering, orthogonal_sort); // Test scenario/input has an orthogonal sort: - let test = EnforceSortingTest::new(output_sort) - .with_repartition_sorts(true) - .with_expected_description( - "// Test: should keep the orthogonal sort, since it modifies the output:", - ); + let test = EnforceSortingTest::new(output_sort).with_repartition_sorts(true); assert_snapshot!(test.run(), @r" - // Test: should keep the orthogonal sort, since it modifies the output: Input / Optimized Plan: SortExec: expr=[b@1 ASC, c@2 ASC], preserve_partitioning=[false] SortExec: TopK(fetch=3), expr=[a@0 ASC], preserve_partitioning=[false] StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC] "); + // Test: should keep the orthogonal sort, since it modifies the output: + Ok(()) } @@ -3801,8 +3750,7 @@ fn test_handles_multiple_orthogonal_sorts() -> Result<()> { let output_sort = sort_exec(input_ordering, orthogonal_sort_3); // final sort // Test scenario/input has an orthogonal sort: - let test = EnforceSortingTest::new(output_sort.clone()).with_repartition_sorts(true) - .with_expected_description("// Test: should keep only the needed orthogonal sort, and remove the unneeded ones:"); + let test = EnforceSortingTest::new(output_sort.clone()).with_repartition_sorts(true); assert_snapshot!(test.run(), @r" Input Plan: SortExec: expr=[b@1 ASC, c@2 ASC], preserve_partitioning=[false] @@ -3812,12 +3760,13 @@ fn test_handles_multiple_orthogonal_sorts() -> Result<()> { SortExec: expr=[c@2 ASC], preserve_partitioning=[false] StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC] - // Test: should keep only the needed orthogonal sort, and remove the unneeded ones: Optimized Plan: SortExec: expr=[b@1 ASC, c@2 ASC], preserve_partitioning=[false] SortExec: TopK(fetch=3), expr=[a@0 ASC], preserve_partitioning=[false] StreamingTableExec: partition_sizes=1, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[b@1 ASC, c@2 ASC] "); + + // Test: should keep only the needed orthogonal sort, and remove the unneeded ones: Ok(()) }