diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark32/20.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark32/20.txt index 6fcedbbda996..fcb70f9349be 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark32/20.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark32/20.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (107) +AdaptiveSparkPlan (110) +- == Final Plan == VeloxColumnarToRowExec (70) +- ^ SortExecTransformer (68) @@ -56,42 +56,45 @@ AdaptiveSparkPlan (107) +- ^ FilterExecTransformer (54) +- ^ Scan parquet (53) +- == Initial Plan == - Sort (106) - +- Exchange (105) - +- Project (104) - +- BroadcastHashJoin Inner BuildRight (103) - :- Project (98) - : +- ShuffledHashJoin LeftSemi BuildRight (97) - : :- Exchange (73) - : : +- Filter (72) - : : +- Scan parquet (71) - : +- Exchange (96) - : +- Project (95) - : +- BroadcastHashJoin Inner BuildLeft (94) - : :- BroadcastExchange (81) - : : +- BroadcastHashJoin LeftSemi BuildRight (80) - : : :- Filter (75) - : : : +- Scan parquet (74) - : : +- BroadcastExchange (79) - : : +- Project (78) - : : +- Filter (77) - : : +- Scan parquet (76) - : +- Filter (93) - : +- HashAggregate (92) - : +- Exchange (91) - : +- HashAggregate (90) - : +- BroadcastHashJoin LeftSemi BuildRight (89) - : :- Project (84) - : : +- Filter (83) - : : +- Scan parquet (82) - : +- BroadcastExchange (88) - : +- Project (87) - : +- Filter (86) - : +- Scan parquet (85) - +- BroadcastExchange (102) - +- Project (101) - +- Filter (100) - +- Scan parquet (99) + Sort (109) + +- Exchange (108) + +- Project (107) + +- BroadcastHashJoin Inner BuildRight (106) + :- Project (101) + : +- ShuffledHashJoinExecTemp (100) + : +- SortMergeJoin LeftSemi (99) + : :- Sort (74) + : : +- Exchange (73) + : : +- Filter (72) + : : +- Scan parquet (71) + : +- Sort (98) + : +- Exchange (97) + : +- Project (96) + : +- BroadcastHashJoin Inner BuildLeft (95) + : :- BroadcastExchange (82) + : : +- BroadcastHashJoin LeftSemi BuildRight (81) + : : :- Filter (76) + : : : +- Scan parquet (75) + : : +- BroadcastExchange (80) + : : +- Project (79) + : : +- Filter (78) + : : +- Scan parquet (77) + : +- Filter (94) + : +- HashAggregate (93) + : +- Exchange (92) + : +- HashAggregate (91) + : +- BroadcastHashJoin LeftSemi BuildRight (90) + : :- Project (85) + : : +- Filter (84) + : : +- Scan parquet (83) + : +- BroadcastExchange (89) + : +- Project (88) + : +- Filter (87) + : +- Scan parquet (86) + +- BroadcastExchange (105) + +- Project (104) + +- Filter (103) + +- Scan parquet (102) (1) Scan parquet @@ -397,164 +400,176 @@ Condition : isnotnull(s_nationkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) Scan parquet +(74) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(75) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_availqty), IsNotNull(ps_partkey), IsNotNull(ps_suppkey)] ReadSchema: struct -(75) Filter +(76) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Condition : ((isnotnull(ps_availqty#X) AND isnotnull(ps_partkey#X)) AND isnotnull(ps_suppkey#X)) -(76) Scan parquet +(77) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(77) Filter +(78) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(78) Project +(79) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(79) BroadcastExchange +(80) BroadcastExchange Input [1]: [p_partkey#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] -(80) BroadcastHashJoin +(81) BroadcastHashJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(81) BroadcastExchange +(82) BroadcastExchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false], input[1, bigint, false]),false), [plan_id=X] -(82) Scan parquet +(83) Scan parquet Output [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)] ReadSchema: struct -(83) Filter +(84) Filter Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND isnotnull(l_partkey#X)) AND isnotnull(l_suppkey#X)) -(84) Project +(85) Project Output [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] -(85) Scan parquet +(86) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(86) Filter +(87) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(87) Project +(88) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(88) BroadcastExchange +(89) BroadcastExchange Input [1]: [p_partkey#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] -(89) BroadcastHashJoin +(90) BroadcastHashJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(90) HashAggregate +(91) HashAggregate Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -(91) Exchange +(92) Exchange Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(92) HashAggregate +(93) HashAggregate Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [CheckOverflow((0.50 * promote_precision(sum(l_quantity#X)#X)), DecimalType(24,3), true) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(93) Filter +(94) Filter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Condition : isnotnull((0.5 * sum(l_quantity))#X) -(94) BroadcastHashJoin +(95) BroadcastHashJoin Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join condition: (cast(cast(ps_availqty#X as decimal(10,0)) as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(95) Project +(96) Project Output [1]: [ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(96) Exchange +(97) Exchange Input [1]: [ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(97) ShuffledHashJoin +(98) Sort +Input [1]: [ps_suppkey#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(99) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join condition: None -(98) Project +(100) ShuffledHashJoinExecTemp +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Arguments: BuildRight + +(101) Project Output [3]: [s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(99) Scan parquet +(102) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(100) Filter +(103) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(101) Project +(104) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(102) BroadcastExchange +(105) BroadcastExchange Input [1]: [n_nationkey#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] -(103) BroadcastHashJoin +(106) BroadcastHashJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(104) Project +(107) Project Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(105) Exchange +(108) Exchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(106) Sort +(109) Sort Input [2]: [s_name#X, s_address#X] Arguments: [s_name#X ASC NULLS FIRST], true, 0 -(107) AdaptiveSparkPlan +(110) AdaptiveSparkPlan Output [2]: [s_name#X, s_address#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark33/20.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark33/20.txt index 32f28a0311c4..aa84481822d4 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark33/20.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark33/20.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (104) +AdaptiveSparkPlan (107) +- == Final Plan == VeloxColumnarToRowExec (67) +- AQEShuffleRead (66) @@ -55,42 +55,45 @@ AdaptiveSparkPlan (104) +- ^ FilterExecTransformer (54) +- ^ Scan parquet (53) +- == Initial Plan == - Sort (103) - +- Exchange (102) - +- Project (101) - +- BroadcastHashJoin Inner BuildRight (100) - :- Project (95) - : +- ShuffledHashJoin LeftSemi BuildRight (94) - : :- Exchange (70) - : : +- Filter (69) - : : +- Scan parquet (68) - : +- Exchange (93) - : +- Project (92) - : +- BroadcastHashJoin Inner BuildLeft (91) - : :- BroadcastExchange (78) - : : +- BroadcastHashJoin LeftSemi BuildRight (77) - : : :- Filter (72) - : : : +- Scan parquet (71) - : : +- BroadcastExchange (76) - : : +- Project (75) - : : +- Filter (74) - : : +- Scan parquet (73) - : +- Filter (90) - : +- HashAggregate (89) - : +- Exchange (88) - : +- HashAggregate (87) - : +- BroadcastHashJoin LeftSemi BuildRight (86) - : :- Project (81) - : : +- Filter (80) - : : +- Scan parquet (79) - : +- BroadcastExchange (85) - : +- Project (84) - : +- Filter (83) - : +- Scan parquet (82) - +- BroadcastExchange (99) - +- Project (98) - +- Filter (97) - +- Scan parquet (96) + Sort (106) + +- Exchange (105) + +- Project (104) + +- BroadcastHashJoin Inner BuildRight (103) + :- Project (98) + : +- ShuffledHashJoinExecTemp (97) + : +- SortMergeJoin LeftSemi (96) + : :- Sort (71) + : : +- Exchange (70) + : : +- Filter (69) + : : +- Scan parquet (68) + : +- Sort (95) + : +- Exchange (94) + : +- Project (93) + : +- BroadcastHashJoin Inner BuildLeft (92) + : :- BroadcastExchange (79) + : : +- BroadcastHashJoin LeftSemi BuildRight (78) + : : :- Filter (73) + : : : +- Scan parquet (72) + : : +- BroadcastExchange (77) + : : +- Project (76) + : : +- Filter (75) + : : +- Scan parquet (74) + : +- Filter (91) + : +- HashAggregate (90) + : +- Exchange (89) + : +- HashAggregate (88) + : +- BroadcastHashJoin LeftSemi BuildRight (87) + : :- Project (82) + : : +- Filter (81) + : : +- Scan parquet (80) + : +- BroadcastExchange (86) + : +- Project (85) + : +- Filter (84) + : +- Scan parquet (83) + +- BroadcastExchange (102) + +- Project (101) + +- Filter (100) + +- Scan parquet (99) (1) Scan parquet @@ -386,164 +389,176 @@ Condition : isnotnull(s_nationkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(71) Scan parquet +(71) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(72) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_availqty), IsNotNull(ps_partkey), IsNotNull(ps_suppkey)] ReadSchema: struct -(72) Filter +(73) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Condition : ((isnotnull(ps_availqty#X) AND isnotnull(ps_partkey#X)) AND isnotnull(ps_suppkey#X)) -(73) Scan parquet +(74) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(74) Filter +(75) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(75) Project +(76) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(76) BroadcastExchange +(77) BroadcastExchange Input [1]: [p_partkey#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] -(77) BroadcastHashJoin +(78) BroadcastHashJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(78) BroadcastExchange +(79) BroadcastExchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false], input[1, bigint, false]),false), [plan_id=X] -(79) Scan parquet +(80) Scan parquet Output [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)] ReadSchema: struct -(80) Filter +(81) Filter Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND isnotnull(l_partkey#X)) AND isnotnull(l_suppkey#X)) -(81) Project +(82) Project Output [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] -(82) Scan parquet +(83) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(83) Filter +(84) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(84) Project +(85) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(85) BroadcastExchange +(86) BroadcastExchange Input [1]: [p_partkey#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] -(86) BroadcastHashJoin +(87) BroadcastHashJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(87) HashAggregate +(88) HashAggregate Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -(88) Exchange +(89) Exchange Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(89) HashAggregate +(90) HashAggregate Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [CheckOverflow((0.50 * promote_precision(sum(l_quantity#X)#X)), DecimalType(24,3)) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(90) Filter +(91) Filter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Condition : isnotnull((0.5 * sum(l_quantity))#X) -(91) BroadcastHashJoin +(92) BroadcastHashJoin Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join condition: (cast(ps_availqty#X as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(92) Project +(93) Project Output [1]: [ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(93) Exchange +(94) Exchange Input [1]: [ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(94) ShuffledHashJoin +(95) Sort +Input [1]: [ps_suppkey#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(96) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join condition: None -(95) Project +(97) ShuffledHashJoinExecTemp +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Arguments: BuildRight + +(98) Project Output [3]: [s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(96) Scan parquet +(99) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(97) Filter +(100) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(98) Project +(101) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(99) BroadcastExchange +(102) BroadcastExchange Input [1]: [n_nationkey#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] -(100) BroadcastHashJoin +(103) BroadcastHashJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(101) Project +(104) Project Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(102) Exchange +(105) Exchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(103) Sort +(106) Sort Input [2]: [s_name#X, s_address#X] Arguments: [s_name#X ASC NULLS FIRST], true, 0 -(104) AdaptiveSparkPlan +(107) AdaptiveSparkPlan Output [2]: [s_name#X, s_address#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/10.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/10.txt index 313572f951ad..5cb6baf0fea2 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/10.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/10.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (87) +AdaptiveSparkPlan (96) +- == Final Plan == VeloxColumnarToRowExec (60) +- TakeOrderedAndProjectExecTransformer (59) @@ -47,32 +47,41 @@ AdaptiveSparkPlan (87) +- ^ FilterExecTransformer (40) +- ^ Scan parquet (39) +- == Initial Plan == - TakeOrderedAndProject (86) - +- HashAggregate (85) - +- Exchange (84) - +- HashAggregate (83) - +- Project (82) - +- ShuffledHashJoin Inner BuildRight (81) - :- Exchange (77) - : +- Project (76) - : +- ShuffledHashJoin Inner BuildRight (75) - : :- Exchange (70) - : : +- Project (69) - : : +- ShuffledHashJoin Inner BuildRight (68) - : : :- Exchange (63) - : : : +- Filter (62) - : : : +- Scan parquet (61) - : : +- Exchange (67) - : : +- Project (66) - : : +- Filter (65) - : : +- Scan parquet (64) - : +- Exchange (74) - : +- Project (73) - : +- Filter (72) - : +- Scan parquet (71) - +- Exchange (80) - +- Filter (79) - +- Scan parquet (78) + TakeOrderedAndProject (95) + +- HashAggregate (94) + +- Exchange (93) + +- HashAggregate (92) + +- Project (91) + +- ShuffledHashJoinExecTemp (90) + +- SortMergeJoin Inner (89) + :- Sort (84) + : +- Exchange (83) + : +- Project (82) + : +- ShuffledHashJoinExecTemp (81) + : +- SortMergeJoin Inner (80) + : :- Sort (74) + : : +- Exchange (73) + : : +- Project (72) + : : +- ShuffledHashJoinExecTemp (71) + : : +- SortMergeJoin Inner (70) + : : :- Sort (64) + : : : +- Exchange (63) + : : : +- Filter (62) + : : : +- Scan parquet (61) + : : +- Sort (69) + : : +- Exchange (68) + : : +- Project (67) + : : +- Filter (66) + : : +- Scan parquet (65) + : +- Sort (79) + : +- Exchange (78) + : +- Project (77) + : +- Filter (76) + : +- Scan parquet (75) + +- Sort (88) + +- Exchange (87) + +- Filter (86) + +- Scan parquet (85) (1) Scan parquet @@ -336,116 +345,152 @@ Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) Input [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(64) Scan parquet +(64) Sort +Input [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(65) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1993-10-01), LessThan(o_orderdate,1994-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(65) Filter +(66) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1993-10-01)) AND (o_orderdate#X < 1994-01-01)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(66) Project +(67) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] -(67) Exchange +(68) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(68) ShuffledHashJoin +(69) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(70) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(69) Project +(71) ShuffledHashJoinExecTemp +Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, o_custkey#X] +Arguments: BuildRight + +(72) Project Output [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, o_custkey#X] -(70) Exchange +(73) Exchange Input [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(71) Scan parquet +(74) Sort +Input [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(75) Scan parquet Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_returnflag), EqualTo(l_returnflag,R), IsNotNull(l_orderkey)] ReadSchema: struct -(72) Filter +(76) Filter Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] Condition : ((isnotnull(l_returnflag#X) AND (l_returnflag#X = R)) AND isnotnull(l_orderkey#X)) -(73) Project +(77) Project Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] -(74) Exchange +(78) Exchange Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(75) ShuffledHashJoin +(79) Sort +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(80) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(76) Project +(81) ShuffledHashJoinExecTemp +Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: BuildRight + +(82) Project Output [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X] -(77) Exchange +(83) Exchange Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(78) Scan parquet +(84) Sort +Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(85) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(79) Filter +(86) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(80) Exchange +(87) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(81) ShuffledHashJoin +(88) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(89) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(82) Project +(90) ShuffledHashJoinExecTemp +Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_nationkey#X, n_name#X] +Arguments: BuildRight + +(91) Project Output [9]: [c_custkey#X, c_name#X, c_address#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_name#X] Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_nationkey#X, n_name#X] -(83) HashAggregate +(92) HashAggregate Input [9]: [c_custkey#X, c_name#X, c_address#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_name#X] Keys [7]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] -(84) Exchange +(93) Exchange Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] Arguments: hashpartitioning(c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(85) HashAggregate +(94) HashAggregate Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] Keys [7]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [8]: [c_custkey#X, c_name#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X AS revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] -(86) TakeOrderedAndProject +(95) TakeOrderedAndProject Input [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] Arguments: X, [revenue#X DESC NULLS LAST], [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] -(87) AdaptiveSparkPlan +(96) AdaptiveSparkPlan Output [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/11.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/11.txt index e91064e3580f..15582f2e052a 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/11.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/11.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (72) +AdaptiveSparkPlan (78) +- == Final Plan == VeloxColumnarToRowExec (50) +- ^ SortExecTransformer (48) @@ -39,27 +39,33 @@ AdaptiveSparkPlan (72) +- ^ FilterExecTransformer (25) +- ^ Scan parquet (24) +- == Initial Plan == - Sort (71) - +- Exchange (70) - +- Filter (69) - +- HashAggregate (68) - +- Exchange (67) - +- HashAggregate (66) - +- Project (65) - +- ShuffledHashJoin Inner BuildRight (64) - :- Exchange (59) - : +- Project (58) - : +- ShuffledHashJoin Inner BuildRight (57) - : :- Exchange (53) - : : +- Filter (52) - : : +- Scan parquet (51) - : +- Exchange (56) - : +- Filter (55) - : +- Scan parquet (54) - +- Exchange (63) - +- Project (62) - +- Filter (61) - +- Scan parquet (60) + Sort (77) + +- Exchange (76) + +- Filter (75) + +- HashAggregate (74) + +- Exchange (73) + +- HashAggregate (72) + +- Project (71) + +- ShuffledHashJoinExecTemp (70) + +- SortMergeJoin Inner (69) + :- Sort (63) + : +- Exchange (62) + : +- Project (61) + : +- ShuffledHashJoinExecTemp (60) + : +- SortMergeJoin Inner (59) + : :- Sort (54) + : : +- Exchange (53) + : : +- Filter (52) + : : +- Scan parquet (51) + : +- Sort (58) + : +- Exchange (57) + : +- Filter (56) + : +- Scan parquet (55) + +- Sort (68) + +- Exchange (67) + +- Project (66) + +- Filter (65) + +- Scan parquet (64) (1) Scan parquet @@ -281,92 +287,116 @@ Condition : isnotnull(ps_suppkey#X) Input [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(54) Scan parquet +(54) Sort +Input [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(55) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(55) Filter +(56) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(56) Exchange +(57) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(57) ShuffledHashJoin +(58) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(59) SortMergeJoin Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(58) Project +(60) ShuffledHashJoinExecTemp +Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] +Arguments: BuildRight + +(61) Project Output [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(59) Exchange +(62) Exchange Input [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(60) Scan parquet +(63) Sort +Input [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(64) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,GERMANY), IsNotNull(n_nationkey)] ReadSchema: struct -(61) Filter +(65) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = GERMANY)) AND isnotnull(n_nationkey#X)) -(62) Project +(66) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(63) Exchange +(67) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(64) ShuffledHashJoin +(68) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(69) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(65) Project +(70) ShuffledHashJoinExecTemp +Input [5]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] +Arguments: BuildRight + +(71) Project Output [3]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X] Input [5]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(66) HashAggregate +(72) HashAggregate Input [3]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X] Keys [1]: [ps_partkey#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(cast(ps_availqty#X as decimal(10,0)) as decimal(12,2)))), DecimalType(23,2), true))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [ps_partkey#X, sum#X, isEmpty#X] -(67) Exchange +(73) Exchange Input [3]: [ps_partkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(68) HashAggregate +(74) HashAggregate Input [3]: [ps_partkey#X, sum#X, isEmpty#X] Keys [1]: [ps_partkey#X] Functions [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(cast(ps_availqty#X as decimal(10,0)) as decimal(12,2)))), DecimalType(23,2), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(cast(ps_availqty#X as decimal(10,0)) as decimal(12,2)))), DecimalType(23,2), true))#X] Results [2]: [ps_partkey#X, sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(cast(ps_availqty#X as decimal(10,0)) as decimal(12,2)))), DecimalType(23,2), true))#X AS value#X] -(69) Filter +(75) Filter Input [2]: [ps_partkey#X, value#X] Condition : (isnotnull(value#X) AND (cast(value#X as decimal(38,6)) > Subquery subquery#X, [id=#X])) -(70) Exchange +(76) Exchange Input [2]: [ps_partkey#X, value#X] Arguments: rangepartitioning(value#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(71) Sort +(77) Sort Input [2]: [ps_partkey#X, value#X] Arguments: [value#X DESC NULLS LAST], true, 0 -(72) AdaptiveSparkPlan +(78) AdaptiveSparkPlan Output [2]: [ps_partkey#X, value#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/12.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/12.txt index 8bad82473a58..be0b8425b17b 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/12.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/12.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (49) +AdaptiveSparkPlan (52) +- == Final Plan == VeloxColumnarToRowExec (34) +- ^ SortExecTransformer (32) @@ -27,20 +27,23 @@ AdaptiveSparkPlan (49) +- ^ FilterExecTransformer (10) +- ^ Scan parquet (9) +- == Initial Plan == - Sort (48) - +- Exchange (47) - +- HashAggregate (46) - +- Exchange (45) - +- HashAggregate (44) - +- Project (43) - +- ShuffledHashJoin Inner BuildLeft (42) - :- Exchange (37) - : +- Filter (36) - : +- Scan parquet (35) - +- Exchange (41) - +- Project (40) - +- Filter (39) - +- Scan parquet (38) + Sort (51) + +- Exchange (50) + +- HashAggregate (49) + +- Exchange (48) + +- HashAggregate (47) + +- Project (46) + +- ShuffledHashJoinExecTemp (45) + +- SortMergeJoin Inner (44) + :- Sort (38) + : +- Exchange (37) + : +- Filter (36) + : +- Scan parquet (35) + +- Sort (43) + +- Exchange (42) + +- Project (41) + +- Filter (40) + +- Scan parquet (39) (1) Scan parquet @@ -198,60 +201,72 @@ Condition : isnotnull(o_orderkey#X) Input [2]: [o_orderkey#X, o_orderpriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(38) Scan parquet +(38) Sort +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(39) Scan parquet Output [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate), IsNotNull(l_shipdate), In(l_shipmode, [MAIL,SHIP]), GreaterThanOrEqual(l_receiptdate,1994-01-01), LessThan(l_receiptdate,1995-01-01), IsNotNull(l_orderkey)] ReadSchema: struct -(39) Filter +(40) Filter Input [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] Condition : ((((((((isnotnull(l_commitdate#X) AND isnotnull(l_receiptdate#X)) AND isnotnull(l_shipdate#X)) AND l_shipmode#X IN (MAIL,SHIP)) AND (l_commitdate#X < l_receiptdate#X)) AND (l_shipdate#X < l_commitdate#X)) AND (l_receiptdate#X >= 1994-01-01)) AND (l_receiptdate#X < 1995-01-01)) AND isnotnull(l_orderkey#X)) -(40) Project +(41) Project Output [2]: [l_orderkey#X, l_shipmode#X] Input [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] -(41) Exchange +(42) Exchange Input [2]: [l_orderkey#X, l_shipmode#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(42) ShuffledHashJoin +(43) Sort +Input [2]: [l_orderkey#X, l_shipmode#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(44) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(43) Project +(45) ShuffledHashJoinExecTemp +Input [4]: [o_orderkey#X, o_orderpriority#X, l_orderkey#X, l_shipmode#X] +Arguments: BuildLeft + +(46) Project Output [2]: [o_orderpriority#X, l_shipmode#X] Input [4]: [o_orderkey#X, o_orderpriority#X, l_orderkey#X, l_shipmode#X] -(44) HashAggregate +(47) HashAggregate Input [2]: [o_orderpriority#X, l_shipmode#X] Keys [1]: [l_shipmode#X] Functions [2]: [partial_sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)] Aggregate Attributes [2]: [sum#X, sum#X] Results [3]: [l_shipmode#X, sum#X, sum#X] -(45) Exchange +(48) Exchange Input [3]: [l_shipmode#X, sum#X, sum#X] Arguments: hashpartitioning(l_shipmode#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(46) HashAggregate +(49) HashAggregate Input [3]: [l_shipmode#X, sum#X, sum#X] Keys [1]: [l_shipmode#X] Functions [2]: [sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END), sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)] Aggregate Attributes [2]: [sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X, sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X] Results [3]: [l_shipmode#X, sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X AS high_line_count#X, sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X AS low_line_count#X] -(47) Exchange +(50) Exchange Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: rangepartitioning(l_shipmode#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) Sort +(51) Sort Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: [l_shipmode#X ASC NULLS FIRST], true, 0 -(49) AdaptiveSparkPlan +(52) AdaptiveSparkPlan Output [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/13.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/13.txt index 3268390701fc..1ec70c5928f0 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/13.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/13.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (52) +AdaptiveSparkPlan (55) +- == Final Plan == VeloxColumnarToRowExec (36) +- ^ SortExecTransformer (34) @@ -29,21 +29,24 @@ AdaptiveSparkPlan (52) +- ^ FilterExecTransformer (9) +- ^ Scan parquet (8) +- == Initial Plan == - Sort (51) - +- Exchange (50) - +- HashAggregate (49) - +- Exchange (48) - +- HashAggregate (47) - +- HashAggregate (46) - +- HashAggregate (45) - +- Project (44) - +- ShuffledHashJoin LeftOuter BuildRight (43) - :- Exchange (38) - : +- Scan parquet (37) - +- Exchange (42) - +- Project (41) - +- Filter (40) - +- Scan parquet (39) + Sort (54) + +- Exchange (53) + +- HashAggregate (52) + +- Exchange (51) + +- HashAggregate (50) + +- HashAggregate (49) + +- HashAggregate (48) + +- Project (47) + +- ShuffledHashJoinExecTemp (46) + +- SortMergeJoin LeftOuter (45) + :- Sort (39) + : +- Exchange (38) + : +- Scan parquet (37) + +- Sort (44) + +- Exchange (43) + +- Project (42) + +- Filter (41) + +- Scan parquet (40) (1) Scan parquet @@ -209,74 +212,86 @@ ReadSchema: struct Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(39) Scan parquet +(39) Sort +Input [1]: [c_custkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(40) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_comment), IsNotNull(o_custkey)] ReadSchema: struct -(40) Filter +(41) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Condition : ((isnotnull(o_comment#X) AND NOT o_comment#X LIKE %special%requests%) AND isnotnull(o_custkey#X)) -(41) Project +(42) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] -(42) Exchange +(43) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) ShuffledHashJoin +(44) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(45) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(44) Project +(46) ShuffledHashJoinExecTemp +Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] +Arguments: BuildLeft + +(47) Project Output [2]: [c_custkey#X, o_orderkey#X] Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] -(45) HashAggregate +(48) HashAggregate Input [2]: [c_custkey#X, o_orderkey#X] Keys [1]: [c_custkey#X] Functions [1]: [partial_count(o_orderkey#X)] Aggregate Attributes [1]: [count#X] Results [2]: [c_custkey#X, count#X] -(46) HashAggregate +(49) HashAggregate Input [2]: [c_custkey#X, count#X] Keys [1]: [c_custkey#X] Functions [1]: [count(o_orderkey#X)] Aggregate Attributes [1]: [count(o_orderkey#X)#X] Results [1]: [count(o_orderkey#X)#X AS c_count#X] -(47) HashAggregate +(50) HashAggregate Input [1]: [c_count#X] Keys [1]: [c_count#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [c_count#X, count#X] -(48) Exchange +(51) Exchange Input [2]: [c_count#X, count#X] Arguments: hashpartitioning(c_count#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(49) HashAggregate +(52) HashAggregate Input [2]: [c_count#X, count#X] Keys [1]: [c_count#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [c_count#X, count(1)#X AS custdist#X] -(50) Exchange +(53) Exchange Input [2]: [c_count#X, custdist#X] Arguments: rangepartitioning(custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(51) Sort +(54) Sort Input [2]: [c_count#X, custdist#X] Arguments: [custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST], true, 0 -(52) AdaptiveSparkPlan +(55) AdaptiveSparkPlan Output [2]: [c_count#X, custdist#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/14.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/14.txt index dd4a53cfa822..13a34e7db6fd 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/14.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/14.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (35) +AdaptiveSparkPlan (38) +- == Final Plan == VeloxColumnarToRowExec (23) +- ^ ProjectExecTransformer (21) @@ -20,17 +20,20 @@ AdaptiveSparkPlan (35) +- ^ FilterExecTransformer (10) +- ^ Scan parquet (9) +- == Initial Plan == - HashAggregate (34) - +- HashAggregate (33) - +- Project (32) - +- ShuffledHashJoin Inner BuildRight (31) - :- Exchange (27) - : +- Project (26) - : +- Filter (25) - : +- Scan parquet (24) - +- Exchange (30) - +- Filter (29) - +- Scan parquet (28) + HashAggregate (37) + +- HashAggregate (36) + +- Project (35) + +- ShuffledHashJoinExecTemp (34) + +- SortMergeJoin Inner (33) + :- Sort (28) + : +- Exchange (27) + : +- Project (26) + : +- Filter (25) + : +- Scan parquet (24) + +- Sort (32) + +- Exchange (31) + +- Filter (30) + +- Scan parquet (29) (1) Scan parquet @@ -152,44 +155,56 @@ Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(28) Scan parquet +(28) Sort +Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(29) Scan parquet Output [2]: [p_partkey#X, p_type#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_partkey)] ReadSchema: struct -(29) Filter +(30) Filter Input [2]: [p_partkey#X, p_type#X] Condition : isnotnull(p_partkey#X) -(30) Exchange +(31) Exchange Input [2]: [p_partkey#X, p_type#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(31) ShuffledHashJoin +(32) Sort +Input [2]: [p_partkey#X, p_type#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(33) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(32) Project +(34) ShuffledHashJoinExecTemp +Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] +Arguments: BuildRight + +(35) Project Output [3]: [l_extendedprice#X, l_discount#X, p_type#X] Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] -(33) HashAggregate +(36) HashAggregate Input [3]: [l_extendedprice#X, l_discount#X, p_type#X] Keys: [] Functions [2]: [partial_sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END), partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Results [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -(34) HashAggregate +(37) HashAggregate Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Keys: [] Functions [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END), sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END)#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [1]: [CheckOverflow((promote_precision(CheckOverflow((100.0000 * promote_precision(sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END)#X)), DecimalType(38,6), true)) / promote_precision(cast(sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X as decimal(38,6)))), DecimalType(38,6), true) AS promo_revenue#X] -(35) AdaptiveSparkPlan +(38) AdaptiveSparkPlan Output [1]: [promo_revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/15.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/15.txt index c456274bfeaa..d3e4d0ab8781 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/15.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/15.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (45) +AdaptiveSparkPlan (48) +- == Final Plan == VeloxColumnarToRowExec (30) +- ^ SortExecTransformer (28) @@ -25,20 +25,23 @@ AdaptiveSparkPlan (45) +- ^ FilterExecTransformer (10) +- ^ Scan parquet (9) +- == Initial Plan == - Sort (44) - +- Exchange (43) - +- Project (42) - +- ShuffledHashJoin Inner BuildLeft (41) - :- Exchange (33) - : +- Filter (32) - : +- Scan parquet (31) - +- Filter (40) - +- HashAggregate (39) - +- Exchange (38) - +- HashAggregate (37) - +- Project (36) - +- Filter (35) - +- Scan parquet (34) + Sort (47) + +- Exchange (46) + +- Project (45) + +- ShuffledHashJoinExecTemp (44) + +- SortMergeJoin Inner (43) + :- Sort (34) + : +- Exchange (33) + : +- Filter (32) + : +- Scan parquet (31) + +- Sort (42) + +- Filter (41) + +- HashAggregate (40) + +- Exchange (39) + +- HashAggregate (38) + +- Project (37) + +- Filter (36) + +- Scan parquet (35) (1) Scan parquet @@ -182,60 +185,72 @@ Condition : isnotnull(s_suppkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(34) Scan parquet +(34) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(35) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01), IsNotNull(l_suppkey)] ReadSchema: struct -(35) Filter +(36) Filter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : (((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) AND isnotnull(l_suppkey#X)) -(36) Project +(37) Project Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(37) HashAggregate +(38) HashAggregate Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(38) Exchange +(39) Exchange Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(39) HashAggregate +(40) HashAggregate Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [2]: [l_suppkey#X AS supplier_no#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X AS total_revenue#X] -(40) Filter +(41) Filter Input [2]: [supplier_no#X, total_revenue#X] Condition : (isnotnull(total_revenue#X) AND (total_revenue#X = Subquery subquery#X, [id=#X])) -(41) ShuffledHashJoin +(42) Sort +Input [2]: [supplier_no#X, total_revenue#X] +Arguments: [supplier_no#X ASC NULLS FIRST], false, 0 + +(43) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [supplier_no#X] Join condition: None -(42) Project +(44) ShuffledHashJoinExecTemp +Input [6]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, supplier_no#X, total_revenue#X] +Arguments: BuildLeft + +(45) Project Output [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Input [6]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, supplier_no#X, total_revenue#X] -(43) Exchange +(46) Exchange Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: rangepartitioning(s_suppkey#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(44) Sort +(47) Sort Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: [s_suppkey#X ASC NULLS FIRST], true, 0 -(45) AdaptiveSparkPlan +(48) AdaptiveSparkPlan Output [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/16.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/16.txt index 7bbe4249fdcd..573bcb77c651 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/16.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/16.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (64) +AdaptiveSparkPlan (67) +- == Final Plan == VeloxColumnarToRowExec (42) +- ^ SortExecTransformer (40) @@ -33,27 +33,30 @@ AdaptiveSparkPlan (64) +- ^ FilterExecTransformer (10) +- ^ Scan parquet (9) +- == Initial Plan == - Sort (63) - +- Exchange (62) - +- HashAggregate (61) - +- Exchange (60) - +- HashAggregate (59) - +- HashAggregate (58) - +- Exchange (57) - +- HashAggregate (56) - +- Project (55) - +- ShuffledHashJoin Inner BuildRight (54) - :- Exchange (50) - : +- BroadcastHashJoin LeftAnti BuildRight (49) - : :- Filter (44) - : : +- Scan parquet (43) - : +- BroadcastExchange (48) - : +- Project (47) - : +- Filter (46) - : +- Scan parquet (45) - +- Exchange (53) - +- Filter (52) - +- Scan parquet (51) + Sort (66) + +- Exchange (65) + +- HashAggregate (64) + +- Exchange (63) + +- HashAggregate (62) + +- HashAggregate (61) + +- Exchange (60) + +- HashAggregate (59) + +- Project (58) + +- ShuffledHashJoinExecTemp (57) + +- SortMergeJoin Inner (56) + :- Sort (51) + : +- Exchange (50) + : +- BroadcastHashJoin LeftAnti BuildRight (49) + : :- Filter (44) + : : +- Scan parquet (43) + : +- BroadcastExchange (48) + : +- Project (47) + : +- Filter (46) + : +- Scan parquet (45) + +- Sort (55) + +- Exchange (54) + +- Filter (53) + +- Scan parquet (52) (1) Scan parquet @@ -271,74 +274,86 @@ Join condition: None Input [2]: [ps_partkey#X, ps_suppkey#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(51) Scan parquet +(51) Sort +Input [2]: [ps_partkey#X, ps_suppkey#X] +Arguments: [ps_partkey#X ASC NULLS FIRST], false, 0 + +(52) Scan parquet Output [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_brand), IsNotNull(p_type), Not(EqualTo(p_brand,Brand#X)), Not(StringStartsWith(p_type,MEDIUM POLISHED)), In(p_size, [14,19,23,3,36,45,49,9]), IsNotNull(p_partkey)] ReadSchema: struct -(52) Filter +(53) Filter Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Condition : (((((isnotnull(p_brand#X) AND isnotnull(p_type#X)) AND NOT (p_brand#X = Brand#X)) AND NOT StartsWith(p_type#X, MEDIUM POLISHED)) AND p_size#X IN (49,14,23,45,19,3,36,9)) AND isnotnull(p_partkey#X)) -(53) Exchange +(54) Exchange Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(54) ShuffledHashJoin +(55) Sort +Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(56) SortMergeJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(55) Project +(57) ShuffledHashJoinExecTemp +Input [6]: [ps_partkey#X, ps_suppkey#X, p_partkey#X, p_brand#X, p_type#X, p_size#X] +Arguments: BuildRight + +(58) Project Output [4]: [ps_suppkey#X, p_brand#X, p_type#X, p_size#X] Input [6]: [ps_partkey#X, ps_suppkey#X, p_partkey#X, p_brand#X, p_type#X, p_size#X] -(56) HashAggregate +(59) HashAggregate Input [4]: [ps_suppkey#X, p_brand#X, p_type#X, p_size#X] Keys [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Functions: [] Aggregate Attributes: [] Results [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] -(57) Exchange +(60) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Arguments: hashpartitioning(p_brand#X, p_type#X, p_size#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(58) HashAggregate +(61) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Keys [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Functions: [] Aggregate Attributes: [] Results [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] -(59) HashAggregate +(62) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Keys [3]: [p_brand#X, p_type#X, p_size#X] Functions [1]: [partial_count(distinct ps_suppkey#X)] Aggregate Attributes [1]: [count(ps_suppkey#X)#X] Results [4]: [p_brand#X, p_type#X, p_size#X, count#X] -(60) Exchange +(63) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] Arguments: hashpartitioning(p_brand#X, p_type#X, p_size#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(61) HashAggregate +(64) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] Keys [3]: [p_brand#X, p_type#X, p_size#X] Functions [1]: [count(distinct ps_suppkey#X)] Aggregate Attributes [1]: [count(ps_suppkey#X)#X] Results [4]: [p_brand#X, p_type#X, p_size#X, count(ps_suppkey#X)#X AS supplier_cnt#X] -(62) Exchange +(65) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: rangepartitioning(supplier_cnt#X DESC NULLS LAST, p_brand#X ASC NULLS FIRST, p_type#X ASC NULLS FIRST, p_size#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(63) Sort +(66) Sort Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: [supplier_cnt#X DESC NULLS LAST, p_brand#X ASC NULLS FIRST, p_type#X ASC NULLS FIRST, p_size#X ASC NULLS FIRST], true, 0 -(64) AdaptiveSparkPlan +(67) AdaptiveSparkPlan Output [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/17.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/17.txt index 248f638db9ad..72dbb75b3d9e 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/17.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/17.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (57) +AdaptiveSparkPlan (63) +- == Final Plan == VeloxColumnarToRowExec (37) +- ^ ProjectExecTransformer (35) @@ -32,25 +32,31 @@ AdaptiveSparkPlan (57) +- ^ FilterExecTransformer (20) +- ^ Scan parquet (19) +- == Initial Plan == - HashAggregate (56) - +- HashAggregate (55) - +- Project (54) - +- ShuffledHashJoin Inner BuildRight (53) - :- Project (46) - : +- ShuffledHashJoin Inner BuildRight (45) - : :- Exchange (40) - : : +- Filter (39) - : : +- Scan parquet (38) - : +- Exchange (44) - : +- Project (43) - : +- Filter (42) - : +- Scan parquet (41) - +- Filter (52) - +- HashAggregate (51) - +- Exchange (50) - +- HashAggregate (49) - +- Filter (48) - +- Scan parquet (47) + HashAggregate (62) + +- HashAggregate (61) + +- Project (60) + +- ShuffledHashJoinExecTemp (59) + +- SortMergeJoin Inner (58) + :- Sort (50) + : +- Project (49) + : +- ShuffledHashJoinExecTemp (48) + : +- SortMergeJoin Inner (47) + : :- Sort (41) + : : +- Exchange (40) + : : +- Filter (39) + : : +- Scan parquet (38) + : +- Sort (46) + : +- Exchange (45) + : +- Project (44) + : +- Filter (43) + : +- Scan parquet (42) + +- Sort (57) + +- Filter (56) + +- HashAggregate (55) + +- Exchange (54) + +- HashAggregate (53) + +- Filter (52) + +- Scan parquet (51) (1) Scan parquet @@ -232,90 +238,114 @@ Condition : (isnotnull(l_partkey#X) AND isnotnull(l_quantity#X)) Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(41) Scan parquet +(41) Sort +Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(42) Scan parquet Output [3]: [p_partkey#X, p_brand#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_brand), IsNotNull(p_container), EqualTo(p_brand,Brand#X), EqualTo(p_container,MED BOX), IsNotNull(p_partkey)] ReadSchema: struct -(42) Filter +(43) Filter Input [3]: [p_partkey#X, p_brand#X, p_container#X] Condition : ((((isnotnull(p_brand#X) AND isnotnull(p_container#X)) AND (p_brand#X = Brand#X)) AND (p_container#X = MED BOX)) AND isnotnull(p_partkey#X)) -(43) Project +(44) Project Output [1]: [p_partkey#X] Input [3]: [p_partkey#X, p_brand#X, p_container#X] -(44) Exchange +(45) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(45) ShuffledHashJoin +(46) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(47) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(46) Project +(48) ShuffledHashJoinExecTemp +Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, p_partkey#X] +Arguments: BuildRight + +(49) Project Output [3]: [l_quantity#X, l_extendedprice#X, p_partkey#X] Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, p_partkey#X] -(47) Scan parquet +(50) Sort +Input [3]: [l_quantity#X, l_extendedprice#X, p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(51) Scan parquet Output [2]: [l_partkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey)] ReadSchema: struct -(48) Filter +(52) Filter Input [2]: [l_partkey#X, l_quantity#X] Condition : isnotnull(l_partkey#X) -(49) HashAggregate +(53) HashAggregate Input [2]: [l_partkey#X, l_quantity#X] Keys [1]: [l_partkey#X] Functions [1]: [partial_avg(l_quantity#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [3]: [l_partkey#X, sum#X, count#X] -(50) Exchange +(54) Exchange Input [3]: [l_partkey#X, sum#X, count#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(51) HashAggregate +(55) HashAggregate Input [3]: [l_partkey#X, sum#X, count#X] Keys [1]: [l_partkey#X] Functions [1]: [avg(l_quantity#X)] Aggregate Attributes [1]: [avg(l_quantity#X)#X] Results [2]: [CheckOverflow((0.200000 * promote_precision(avg(l_quantity#X)#X)), DecimalType(18,7), true) AS (0.2 * avg(l_quantity))#X, l_partkey#X] -(52) Filter +(56) Filter Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] Condition : isnotnull((0.2 * avg(l_quantity))#X) -(53) ShuffledHashJoin +(57) Sort +Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(58) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join condition: (cast(l_quantity#X as decimal(18,7)) < (0.2 * avg(l_quantity))#X) -(54) Project +(59) ShuffledHashJoinExecTemp +Input [5]: [l_quantity#X, l_extendedprice#X, p_partkey#X, (0.2 * avg(l_quantity))#X, l_partkey#X] +Arguments: BuildRight + +(60) Project Output [1]: [l_extendedprice#X] Input [5]: [l_quantity#X, l_extendedprice#X, p_partkey#X, (0.2 * avg(l_quantity))#X, l_partkey#X] -(55) HashAggregate +(61) HashAggregate Input [1]: [l_extendedprice#X] Keys: [] Functions [1]: [partial_sum(l_extendedprice#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(56) HashAggregate +(62) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(l_extendedprice#X)] Aggregate Attributes [1]: [sum(l_extendedprice#X)#X] Results [1]: [CheckOverflow((promote_precision(sum(l_extendedprice#X)#X) / 7.00), DecimalType(27,6), true) AS avg_yearly#X] -(57) AdaptiveSparkPlan +(63) AdaptiveSparkPlan Output [1]: [avg_yearly#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/18.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/18.txt index 428186218984..5f8c741d6fb9 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/18.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/18.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (97) +AdaptiveSparkPlan (109) +- == Final Plan == VeloxColumnarToRowExec (64) +- TakeOrderedAndProjectExecTransformer (63) @@ -52,38 +52,50 @@ AdaptiveSparkPlan (97) +- ShuffleQueryStage (51) +- ReusedExchange (50) +- == Initial Plan == - TakeOrderedAndProject (96) - +- HashAggregate (95) - +- HashAggregate (94) - +- Project (93) - +- ShuffledHashJoin Inner BuildRight (92) - :- Exchange (81) - : +- Project (80) - : +- ShuffledHashJoin Inner BuildLeft (79) - : :- Exchange (67) - : : +- Filter (66) - : : +- Scan parquet (65) - : +- Exchange (78) - : +- ShuffledHashJoin LeftSemi BuildRight (77) - : :- Exchange (70) - : : +- Filter (69) - : : +- Scan parquet (68) - : +- Project (76) - : +- Filter (75) - : +- HashAggregate (74) - : +- Exchange (73) - : +- HashAggregate (72) - : +- Scan parquet (71) - +- ShuffledHashJoin LeftSemi BuildRight (91) - :- Exchange (84) - : +- Filter (83) - : +- Scan parquet (82) - +- Project (90) - +- Filter (89) - +- HashAggregate (88) - +- Exchange (87) - +- HashAggregate (86) - +- Scan parquet (85) + TakeOrderedAndProject (108) + +- HashAggregate (107) + +- HashAggregate (106) + +- Project (105) + +- ShuffledHashJoinExecTemp (104) + +- SortMergeJoin Inner (103) + :- Sort (88) + : +- Exchange (87) + : +- Project (86) + : +- ShuffledHashJoinExecTemp (85) + : +- SortMergeJoin Inner (84) + : :- Sort (68) + : : +- Exchange (67) + : : +- Filter (66) + : : +- Scan parquet (65) + : +- Sort (83) + : +- Exchange (82) + : +- ShuffledHashJoinExecTemp (81) + : +- SortMergeJoin LeftSemi (80) + : :- Sort (72) + : : +- Exchange (71) + : : +- Filter (70) + : : +- Scan parquet (69) + : +- Sort (79) + : +- Project (78) + : +- Filter (77) + : +- HashAggregate (76) + : +- Exchange (75) + : +- HashAggregate (74) + : +- Scan parquet (73) + +- Sort (102) + +- ShuffledHashJoinExecTemp (101) + +- SortMergeJoin LeftSemi (100) + :- Sort (92) + : +- Exchange (91) + : +- Filter (90) + : +- Scan parquet (89) + +- Sort (99) + +- Project (98) + +- Filter (97) + +- HashAggregate (96) + +- Exchange (95) + +- HashAggregate (94) + +- Scan parquet (93) (1) Scan parquet @@ -371,154 +383,202 @@ Condition : isnotnull(c_custkey#X) Input [2]: [c_custkey#X, c_name#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(68) Scan parquet +(68) Sort +Input [2]: [c_custkey#X, c_name#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(69) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(69) Filter +(70) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Condition : (isnotnull(o_custkey#X) AND isnotnull(o_orderkey#X)) -(70) Exchange +(71) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(71) Scan parquet +(72) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(73) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(72) HashAggregate +(74) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(73) Exchange +(75) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) HashAggregate +(76) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(75) Filter +(77) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(76) Project +(78) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(77) ShuffledHashJoin +(79) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(80) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(78) Exchange +(81) ShuffledHashJoinExecTemp +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: BuildRight + +(82) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(79) ShuffledHashJoin +(83) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(84) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(80) Project +(85) ShuffledHashJoinExecTemp +Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: BuildLeft + +(86) Project Output [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] -(81) Exchange +(87) Exchange Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(82) Scan parquet +(88) Sort +Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(89) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_orderkey)] ReadSchema: struct -(83) Filter +(90) Filter Input [2]: [l_orderkey#X, l_quantity#X] Condition : isnotnull(l_orderkey#X) -(84) Exchange +(91) Exchange Input [2]: [l_orderkey#X, l_quantity#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(85) Scan parquet +(92) Sort +Input [2]: [l_orderkey#X, l_quantity#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(93) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(86) HashAggregate +(94) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(87) Exchange +(95) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(88) HashAggregate +(96) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(89) Filter +(97) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(90) Project +(98) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(91) ShuffledHashJoin +(99) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(100) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(92) ShuffledHashJoin +(101) ShuffledHashJoinExecTemp +Input [2]: [l_orderkey#X, l_quantity#X] +Arguments: BuildRight + +(102) Sort +Input [2]: [l_orderkey#X, l_quantity#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(103) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(93) Project +(104) ShuffledHashJoinExecTemp +Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_orderkey#X, l_quantity#X] +Arguments: BuildRight + +(105) Project Output [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_orderkey#X, l_quantity#X] -(94) HashAggregate +(106) HashAggregate Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] -(95) HashAggregate +(107) HashAggregate Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity#X)#X AS sum(l_quantity)#X] -(96) TakeOrderedAndProject +(108) TakeOrderedAndProject Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: X, [o_totalprice#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] -(97) AdaptiveSparkPlan +(109) AdaptiveSparkPlan Output [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/19.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/19.txt index 4ea9d09e13f7..b6cbddd8e8b7 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/19.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/19.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (34) +AdaptiveSparkPlan (37) +- == Final Plan == VeloxColumnarToRowExec (22) +- ^ RegularHashAggregateExecTransformer (20) @@ -19,17 +19,20 @@ AdaptiveSparkPlan (34) +- ^ FilterExecTransformer (10) +- ^ Scan parquet (9) +- == Initial Plan == - HashAggregate (33) - +- HashAggregate (32) - +- Project (31) - +- ShuffledHashJoin Inner BuildRight (30) - :- Exchange (26) - : +- Project (25) - : +- Filter (24) - : +- Scan parquet (23) - +- Exchange (29) - +- Filter (28) - +- Scan parquet (27) + HashAggregate (36) + +- HashAggregate (35) + +- Project (34) + +- ShuffledHashJoinExecTemp (33) + +- SortMergeJoin Inner (32) + :- Sort (27) + : +- Exchange (26) + : +- Project (25) + : +- Filter (24) + : +- Scan parquet (23) + +- Sort (31) + +- Exchange (30) + +- Filter (29) + +- Scan parquet (28) (1) Scan parquet @@ -147,44 +150,56 @@ Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipin Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(27) Scan parquet +(27) Sort +Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(28) Scan parquet Output [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_size), GreaterThanOrEqual(p_size,1), IsNotNull(p_partkey), Or(Or(And(And(EqualTo(p_brand,Brand#X),In(p_container, [SM BOX,SM CASE,SM PACK,SM PKG])),LessThanOrEqual(p_size,5)),And(And(EqualTo(p_brand,Brand#X),In(p_container, [MED BAG,MED BOX,MED PACK,MED PKG])),LessThanOrEqual(p_size,10))),And(And(EqualTo(p_brand,Brand#X),In(p_container, [LG BOX,LG CASE,LG PACK,LG PKG])),LessThanOrEqual(p_size,15)))] ReadSchema: struct -(28) Filter +(29) Filter Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Condition : (((isnotnull(p_size#X) AND (p_size#X >= 1)) AND isnotnull(p_partkey#X)) AND (((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (p_size#X <= 5)) OR (((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (p_size#X <= 10))) OR (((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (p_size#X <= 15)))) -(29) Exchange +(30) Exchange Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(30) ShuffledHashJoin +(31) Sort +Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(32) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: (((((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (l_quantity#X >= 1.00)) AND (l_quantity#X <= 11.00)) AND (p_size#X <= 5)) OR (((((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (l_quantity#X >= 10.00)) AND (l_quantity#X <= 20.00)) AND (p_size#X <= 10))) OR (((((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (l_quantity#X >= 20.00)) AND (l_quantity#X <= 30.00)) AND (p_size#X <= 15))) -(31) Project +(33) ShuffledHashJoinExecTemp +Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_brand#X, p_size#X, p_container#X] +Arguments: BuildRight + +(34) Project Output [2]: [l_extendedprice#X, l_discount#X] Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_brand#X, p_size#X, p_container#X] -(32) HashAggregate +(35) HashAggregate Input [2]: [l_extendedprice#X, l_discount#X] Keys: [] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(33) HashAggregate +(36) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X AS revenue#X] -(34) AdaptiveSparkPlan +(37) AdaptiveSparkPlan Output [1]: [revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/20.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/20.txt index 4fe956080086..95e2ea88800c 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/20.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/20.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (126) +AdaptiveSparkPlan (141) +- == Final Plan == VeloxColumnarToRowExec (86) +- ^ SortExecTransformer (84) @@ -66,45 +66,60 @@ AdaptiveSparkPlan (126) +- ^ FilterExecTransformer (70) +- ^ Scan parquet (69) +- == Initial Plan == - Sort (125) - +- Exchange (124) - +- Project (123) - +- ShuffledHashJoin Inner BuildRight (122) - :- Exchange (117) - : +- Project (116) - : +- ShuffledHashJoin LeftSemi BuildRight (115) - : :- Exchange (89) - : : +- Filter (88) - : : +- Scan parquet (87) - : +- Exchange (114) - : +- Project (113) - : +- ShuffledHashJoin Inner BuildLeft (112) - : :- Exchange (98) - : : +- ShuffledHashJoin LeftSemi BuildRight (97) - : : :- Exchange (92) - : : : +- Filter (91) - : : : +- Scan parquet (90) - : : +- Exchange (96) - : : +- Project (95) - : : +- Filter (94) - : : +- Scan parquet (93) - : +- Exchange (111) - : +- Filter (110) - : +- HashAggregate (109) - : +- HashAggregate (108) - : +- ShuffledHashJoin LeftSemi BuildRight (107) - : :- Exchange (102) - : : +- Project (101) - : : +- Filter (100) - : : +- Scan parquet (99) - : +- Exchange (106) - : +- Project (105) - : +- Filter (104) - : +- Scan parquet (103) - +- Exchange (121) - +- Project (120) - +- Filter (119) - +- Scan parquet (118) + Sort (140) + +- Exchange (139) + +- Project (138) + +- ShuffledHashJoinExecTemp (137) + +- SortMergeJoin Inner (136) + :- Sort (130) + : +- Exchange (129) + : +- Project (128) + : +- ShuffledHashJoinExecTemp (127) + : +- SortMergeJoin LeftSemi (126) + : :- Sort (90) + : : +- Exchange (89) + : : +- Filter (88) + : : +- Scan parquet (87) + : +- Sort (125) + : +- Exchange (124) + : +- Project (123) + : +- ShuffledHashJoinExecTemp (122) + : +- SortMergeJoin Inner (121) + : :- Sort (103) + : : +- Exchange (102) + : : +- ShuffledHashJoinExecTemp (101) + : : +- SortMergeJoin LeftSemi (100) + : : :- Sort (94) + : : : +- Exchange (93) + : : : +- Filter (92) + : : : +- Scan parquet (91) + : : +- Sort (99) + : : +- Exchange (98) + : : +- Project (97) + : : +- Filter (96) + : : +- Scan parquet (95) + : +- Sort (120) + : +- Exchange (119) + : +- Filter (118) + : +- HashAggregate (117) + : +- HashAggregate (116) + : +- ShuffledHashJoinExecTemp (115) + : +- SortMergeJoin LeftSemi (114) + : :- Sort (108) + : : +- Exchange (107) + : : +- Project (106) + : : +- Filter (105) + : : +- Scan parquet (104) + : +- Sort (113) + : +- Exchange (112) + : +- Project (111) + : +- Filter (110) + : +- Scan parquet (109) + +- Sort (135) + +- Exchange (134) + +- Project (133) + +- Filter (132) + +- Scan parquet (131) (1) Scan parquet @@ -468,176 +483,236 @@ Condition : isnotnull(s_nationkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(90) Scan parquet +(90) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(91) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_availqty), IsNotNull(ps_partkey), IsNotNull(ps_suppkey)] ReadSchema: struct -(91) Filter +(92) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Condition : ((isnotnull(ps_availqty#X) AND isnotnull(ps_partkey#X)) AND isnotnull(ps_suppkey#X)) -(92) Exchange +(93) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(93) Scan parquet +(94) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: [ps_partkey#X ASC NULLS FIRST], false, 0 + +(95) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(94) Filter +(96) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(95) Project +(97) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(96) Exchange +(98) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(97) ShuffledHashJoin +(99) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(100) SortMergeJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(98) Exchange +(101) ShuffledHashJoinExecTemp +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: BuildRight + +(102) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(99) Scan parquet +(103) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: [ps_partkey#X ASC NULLS FIRST, ps_suppkey#X ASC NULLS FIRST], false, 0 + +(104) Scan parquet Output [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)] ReadSchema: struct -(100) Filter +(105) Filter Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND isnotnull(l_partkey#X)) AND isnotnull(l_suppkey#X)) -(101) Project +(106) Project Output [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] -(102) Exchange +(107) Exchange Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(103) Scan parquet +(108) Sort +Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(109) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(104) Filter +(110) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(105) Project +(111) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(106) Exchange +(112) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(107) ShuffledHashJoin +(113) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(114) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(108) HashAggregate +(115) ShuffledHashJoinExecTemp +Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] +Arguments: BuildRight + +(116) HashAggregate Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -(109) HashAggregate +(117) HashAggregate Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [CheckOverflow((0.50 * promote_precision(sum(l_quantity#X)#X)), DecimalType(24,3), true) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(110) Filter +(118) Filter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Condition : isnotnull((0.5 * sum(l_quantity))#X) -(111) Exchange +(119) Exchange Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(112) ShuffledHashJoin +(120) Sort +Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] +Arguments: [l_partkey#X ASC NULLS FIRST, l_suppkey#X ASC NULLS FIRST], false, 0 + +(121) SortMergeJoin Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join condition: (cast(cast(ps_availqty#X as decimal(10,0)) as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(113) Project +(122) ShuffledHashJoinExecTemp +Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] +Arguments: BuildLeft + +(123) Project Output [1]: [ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(114) Exchange +(124) Exchange Input [1]: [ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(115) ShuffledHashJoin +(125) Sort +Input [1]: [ps_suppkey#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(126) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join condition: None -(116) Project +(127) ShuffledHashJoinExecTemp +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Arguments: BuildRight + +(128) Project Output [3]: [s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(117) Exchange +(129) Exchange Input [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(118) Scan parquet +(130) Sort +Input [3]: [s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(131) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(119) Filter +(132) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(120) Project +(133) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(121) Exchange +(134) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(122) ShuffledHashJoin +(135) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(136) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(123) Project +(137) ShuffledHashJoinExecTemp +Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] +Arguments: BuildRight + +(138) Project Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(124) Exchange +(139) Exchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(125) Sort +(140) Sort Input [2]: [s_name#X, s_address#X] Arguments: [s_name#X ASC NULLS FIRST], true, 0 -(126) AdaptiveSparkPlan +(141) AdaptiveSparkPlan Output [2]: [s_name#X, s_address#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/21.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/21.txt index 6f94bd5c8d01..0e24bcc866b9 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/21.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/21.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (119) +AdaptiveSparkPlan (134) +- == Final Plan == VeloxColumnarToRowExec (82) +- TakeOrderedAndProjectExecTransformer (81) @@ -63,42 +63,57 @@ AdaptiveSparkPlan (119) +- ^ FilterExecTransformer (63) +- ^ Scan parquet (62) +- == Initial Plan == - TakeOrderedAndProject (118) - +- HashAggregate (117) - +- Exchange (116) - +- HashAggregate (115) - +- Project (114) - +- ShuffledHashJoin Inner BuildRight (113) - :- Exchange (108) - : +- Project (107) - : +- ShuffledHashJoin Inner BuildRight (106) - : :- Exchange (101) - : : +- Project (100) - : : +- ShuffledHashJoin Inner BuildLeft (99) - : : :- Exchange (85) - : : : +- Filter (84) - : : : +- Scan parquet (83) - : : +- Exchange (98) - : : +- ShuffledHashJoin LeftAnti BuildRight (97) - : : :- ShuffledHashJoin LeftSemi BuildRight (92) - : : : :- Exchange (89) - : : : : +- Project (88) - : : : : +- Filter (87) - : : : : +- Scan parquet (86) - : : : +- Exchange (91) - : : : +- Scan parquet (90) - : : +- Exchange (96) - : : +- Project (95) - : : +- Filter (94) - : : +- Scan parquet (93) - : +- Exchange (105) - : +- Project (104) - : +- Filter (103) - : +- Scan parquet (102) - +- Exchange (112) - +- Project (111) - +- Filter (110) - +- Scan parquet (109) + TakeOrderedAndProject (133) + +- HashAggregate (132) + +- Exchange (131) + +- HashAggregate (130) + +- Project (129) + +- ShuffledHashJoinExecTemp (128) + +- SortMergeJoin Inner (127) + :- Sort (121) + : +- Exchange (120) + : +- Project (119) + : +- ShuffledHashJoinExecTemp (118) + : +- SortMergeJoin Inner (117) + : :- Sort (111) + : : +- Exchange (110) + : : +- Project (109) + : : +- ShuffledHashJoinExecTemp (108) + : : +- SortMergeJoin Inner (107) + : : :- Sort (86) + : : : +- Exchange (85) + : : : +- Filter (84) + : : : +- Scan parquet (83) + : : +- Sort (106) + : : +- Exchange (105) + : : +- ShuffledHashJoinExecTemp (104) + : : +- SortMergeJoin LeftAnti (103) + : : :- Sort (97) + : : : +- ShuffledHashJoinExecTemp (96) + : : : +- SortMergeJoin LeftSemi (95) + : : : :- Sort (91) + : : : : +- Exchange (90) + : : : : +- Project (89) + : : : : +- Filter (88) + : : : : +- Scan parquet (87) + : : : +- Sort (94) + : : : +- Exchange (93) + : : : +- Scan parquet (92) + : : +- Sort (102) + : : +- Exchange (101) + : : +- Project (100) + : : +- Filter (99) + : : +- Scan parquet (98) + : +- Sort (116) + : +- Exchange (115) + : +- Project (114) + : +- Filter (113) + : +- Scan parquet (112) + +- Sort (126) + +- Exchange (125) + +- Project (124) + +- Filter (123) + +- Scan parquet (122) (1) Scan parquet @@ -451,163 +466,223 @@ Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(86) Scan parquet +(86) Sort +Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(87) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(87) Filter +(88) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((((isnotnull(l_receiptdate#X) AND isnotnull(l_commitdate#X)) AND (l_receiptdate#X > l_commitdate#X)) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(88) Project +(89) Project Output [2]: [l_orderkey#X, l_suppkey#X] Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] -(89) Exchange +(90) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(90) Scan parquet +(91) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(92) Scan parquet Output [2]: [l_orderkey#X, l_suppkey#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(91) Exchange +(93) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(92) ShuffledHashJoin +(94) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(95) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: NOT (l_suppkey#X = l_suppkey#X) -(93) Scan parquet +(96) ShuffledHashJoinExecTemp +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: BuildRight + +(97) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(98) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate)] ReadSchema: struct -(94) Filter +(99) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((isnotnull(l_receiptdate#X) AND isnotnull(l_commitdate#X)) AND (l_receiptdate#X > l_commitdate#X)) -(95) Project +(100) Project Output [2]: [l_orderkey#X, l_suppkey#X] Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] -(96) Exchange +(101) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(97) ShuffledHashJoin +(102) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(103) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: NOT (l_suppkey#X = l_suppkey#X) -(98) Exchange +(104) ShuffledHashJoinExecTemp +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: BuildRight + +(105) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(99) ShuffledHashJoin +(106) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(107) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [l_suppkey#X] Join condition: None -(100) Project +(108) ShuffledHashJoinExecTemp +Input [5]: [s_suppkey#X, s_name#X, s_nationkey#X, l_orderkey#X, l_suppkey#X] +Arguments: BuildLeft + +(109) Project Output [3]: [s_name#X, s_nationkey#X, l_orderkey#X] Input [5]: [s_suppkey#X, s_name#X, s_nationkey#X, l_orderkey#X, l_suppkey#X] -(101) Exchange +(110) Exchange Input [3]: [s_name#X, s_nationkey#X, l_orderkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(102) Scan parquet +(111) Sort +Input [3]: [s_name#X, s_nationkey#X, l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(112) Scan parquet Output [2]: [o_orderkey#X, o_orderstatus#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderstatus), EqualTo(o_orderstatus,F), IsNotNull(o_orderkey)] ReadSchema: struct -(103) Filter +(113) Filter Input [2]: [o_orderkey#X, o_orderstatus#X] Condition : ((isnotnull(o_orderstatus#X) AND (o_orderstatus#X = F)) AND isnotnull(o_orderkey#X)) -(104) Project +(114) Project Output [1]: [o_orderkey#X] Input [2]: [o_orderkey#X, o_orderstatus#X] -(105) Exchange +(115) Exchange Input [1]: [o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(106) ShuffledHashJoin +(116) Sort +Input [1]: [o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(117) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(107) Project +(118) ShuffledHashJoinExecTemp +Input [4]: [s_name#X, s_nationkey#X, l_orderkey#X, o_orderkey#X] +Arguments: BuildRight + +(119) Project Output [2]: [s_name#X, s_nationkey#X] Input [4]: [s_name#X, s_nationkey#X, l_orderkey#X, o_orderkey#X] -(108) Exchange +(120) Exchange Input [2]: [s_name#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(109) Scan parquet +(121) Sort +Input [2]: [s_name#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(122) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,SAUDI ARABIA), IsNotNull(n_nationkey)] ReadSchema: struct -(110) Filter +(123) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = SAUDI ARABIA)) AND isnotnull(n_nationkey#X)) -(111) Project +(124) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(112) Exchange +(125) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(113) ShuffledHashJoin +(126) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(127) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(114) Project +(128) ShuffledHashJoinExecTemp +Input [3]: [s_name#X, s_nationkey#X, n_nationkey#X] +Arguments: BuildRight + +(129) Project Output [1]: [s_name#X] Input [3]: [s_name#X, s_nationkey#X, n_nationkey#X] -(115) HashAggregate +(130) HashAggregate Input [1]: [s_name#X] Keys [1]: [s_name#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [s_name#X, count#X] -(116) Exchange +(131) Exchange Input [2]: [s_name#X, count#X] Arguments: hashpartitioning(s_name#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) HashAggregate +(132) HashAggregate Input [2]: [s_name#X, count#X] Keys [1]: [s_name#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [s_name#X, count(1)#X AS numwait#X] -(118) TakeOrderedAndProject +(133) TakeOrderedAndProject Input [2]: [s_name#X, numwait#X] Arguments: X, [numwait#X DESC NULLS LAST, s_name#X ASC NULLS FIRST], [s_name#X, numwait#X] -(119) AdaptiveSparkPlan +(134) AdaptiveSparkPlan Output [2]: [s_name#X, numwait#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/22.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/22.txt index 77c3b584125b..d1915566b160 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/22.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/22.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (46) +AdaptiveSparkPlan (49) +- == Final Plan == VeloxColumnarToRowExec (33) +- ^ SortExecTransformer (31) @@ -26,18 +26,21 @@ AdaptiveSparkPlan (46) +- ^ ProjectExecTransformer (10) +- ^ Scan parquet (9) +- == Initial Plan == - Sort (45) - +- Exchange (44) - +- HashAggregate (43) - +- Exchange (42) - +- HashAggregate (41) - +- Project (40) - +- ShuffledHashJoin LeftAnti BuildRight (39) - :- Exchange (36) - : +- Filter (35) - : +- Scan parquet (34) - +- Exchange (38) - +- Scan parquet (37) + Sort (48) + +- Exchange (47) + +- HashAggregate (46) + +- Exchange (45) + +- HashAggregate (44) + +- Project (43) + +- ShuffledHashJoinExecTemp (42) + +- SortMergeJoin LeftAnti (41) + :- Sort (37) + : +- Exchange (36) + : +- Filter (35) + : +- Scan parquet (34) + +- Sort (40) + +- Exchange (39) + +- Scan parquet (38) (1) Scan parquet @@ -190,51 +193,63 @@ Condition : ((isnotnull(c_acctbal#X) AND substring(c_phone#X, 1, 2) IN (13,31,23 Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(37) Scan parquet +(37) Sort +Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(38) Scan parquet Output [1]: [o_custkey#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(38) Exchange +(39) Exchange Input [1]: [o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(39) ShuffledHashJoin +(40) Sort +Input [1]: [o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(41) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(40) Project +(42) ShuffledHashJoinExecTemp +Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] +Arguments: BuildRight + +(43) Project Output [2]: [substring(c_phone#X, 1, 2) AS cntrycode#X, c_acctbal#X] Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] -(41) HashAggregate +(44) HashAggregate Input [2]: [cntrycode#X, c_acctbal#X] Keys [1]: [cntrycode#X] Functions [2]: [partial_count(1), partial_sum(c_acctbal#X)] Aggregate Attributes [3]: [count#X, sum#X, isEmpty#X] Results [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] -(42) Exchange +(45) Exchange Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] Arguments: hashpartitioning(cntrycode#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) HashAggregate +(46) HashAggregate Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] Keys [1]: [cntrycode#X] Functions [2]: [count(1), sum(c_acctbal#X)] Aggregate Attributes [2]: [count(1)#X, sum(c_acctbal#X)#X] Results [3]: [cntrycode#X, count(1)#X AS numcust#X, sum(c_acctbal#X)#X AS totacctbal#X] -(44) Exchange +(47) Exchange Input [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: rangepartitioning(cntrycode#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(45) Sort +(48) Sort Input [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: [cntrycode#X ASC NULLS FIRST], true, 0 -(46) AdaptiveSparkPlan +(49) AdaptiveSparkPlan Output [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/3.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/3.txt index 6e4f9178de19..9ad3c25ad8ba 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/3.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/3.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (59) +AdaptiveSparkPlan (65) +- == Final Plan == VeloxColumnarToRowExec (39) +- TakeOrderedAndProjectExecTransformer (38) @@ -32,25 +32,31 @@ AdaptiveSparkPlan (59) +- ^ FilterExecTransformer (25) +- ^ Scan parquet (24) +- == Initial Plan == - TakeOrderedAndProject (58) - +- HashAggregate (57) - +- HashAggregate (56) - +- Project (55) - +- ShuffledHashJoin Inner BuildRight (54) - :- Exchange (49) - : +- Project (48) - : +- ShuffledHashJoin Inner BuildLeft (47) - : :- Exchange (43) - : : +- Project (42) - : : +- Filter (41) - : : +- Scan parquet (40) - : +- Exchange (46) - : +- Filter (45) - : +- Scan parquet (44) - +- Exchange (53) - +- Project (52) - +- Filter (51) - +- Scan parquet (50) + TakeOrderedAndProject (64) + +- HashAggregate (63) + +- HashAggregate (62) + +- Project (61) + +- ShuffledHashJoinExecTemp (60) + +- SortMergeJoin Inner (59) + :- Sort (53) + : +- Exchange (52) + : +- Project (51) + : +- ShuffledHashJoinExecTemp (50) + : +- SortMergeJoin Inner (49) + : :- Sort (44) + : : +- Exchange (43) + : : +- Project (42) + : : +- Filter (41) + : : +- Scan parquet (40) + : +- Sort (48) + : +- Exchange (47) + : +- Filter (46) + : +- Scan parquet (45) + +- Sort (58) + +- Exchange (57) + +- Project (56) + +- Filter (55) + +- Scan parquet (54) (1) Scan parquet @@ -236,80 +242,104 @@ Input [2]: [c_custkey#X, c_mktsegment#X] Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(44) Scan parquet +(44) Sort +Input [1]: [c_custkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(45) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), LessThan(o_orderdate,1995-03-15), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(45) Filter +(46) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Condition : (((isnotnull(o_orderdate#X) AND (o_orderdate#X < 1995-03-15)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(46) Exchange +(47) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) ShuffledHashJoin +(48) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(49) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(48) Project +(50) ShuffledHashJoinExecTemp +Input [5]: [c_custkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] +Arguments: BuildLeft + +(51) Project Output [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Input [5]: [c_custkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] -(49) Exchange +(52) Exchange Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(50) Scan parquet +(53) Sort +Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(54) Scan parquet Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThan(l_shipdate,1995-03-15), IsNotNull(l_orderkey)] ReadSchema: struct -(51) Filter +(55) Filter Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X > 1995-03-15)) AND isnotnull(l_orderkey#X)) -(52) Project +(56) Project Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(53) Exchange +(57) Exchange Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(54) ShuffledHashJoin +(58) Sort +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(59) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(55) Project +(60) ShuffledHashJoinExecTemp +Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: BuildRight + +(61) Project Output [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] -(56) HashAggregate +(62) HashAggregate Input [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] -(57) HashAggregate +(63) HashAggregate Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [4]: [l_orderkey#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X AS revenue#X, o_orderdate#X, o_shippriority#X] -(58) TakeOrderedAndProject +(64) TakeOrderedAndProject Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: X, [revenue#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] -(59) AdaptiveSparkPlan +(65) AdaptiveSparkPlan Output [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/4.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/4.txt index d2e7a9cffbd6..b3f62ba24714 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/4.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/4.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (50) +AdaptiveSparkPlan (53) +- == Final Plan == VeloxColumnarToRowExec (34) +- ^ SortExecTransformer (32) @@ -27,21 +27,24 @@ AdaptiveSparkPlan (50) +- ^ FilterExecTransformer (10) +- ^ Scan parquet (9) +- == Initial Plan == - Sort (49) - +- Exchange (48) - +- HashAggregate (47) - +- Exchange (46) - +- HashAggregate (45) - +- Project (44) - +- ShuffledHashJoin LeftSemi BuildRight (43) - :- Exchange (38) - : +- Project (37) - : +- Filter (36) - : +- Scan parquet (35) - +- Exchange (42) - +- Project (41) - +- Filter (40) - +- Scan parquet (39) + Sort (52) + +- Exchange (51) + +- HashAggregate (50) + +- Exchange (49) + +- HashAggregate (48) + +- Project (47) + +- ShuffledHashJoinExecTemp (46) + +- SortMergeJoin LeftSemi (45) + :- Sort (39) + : +- Exchange (38) + : +- Project (37) + : +- Filter (36) + : +- Scan parquet (35) + +- Sort (44) + +- Exchange (43) + +- Project (42) + +- Filter (41) + +- Scan parquet (40) (1) Scan parquet @@ -203,60 +206,72 @@ Input [3]: [o_orderkey#X, o_orderdate#X, o_orderpriority#X] Input [2]: [o_orderkey#X, o_orderpriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(39) Scan parquet +(39) Sort +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(40) Scan parquet Output [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate)] ReadSchema: struct -(40) Filter +(41) Filter Input [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((isnotnull(l_commitdate#X) AND isnotnull(l_receiptdate#X)) AND (l_commitdate#X < l_receiptdate#X)) -(41) Project +(42) Project Output [1]: [l_orderkey#X] Input [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] -(42) Exchange +(43) Exchange Input [1]: [l_orderkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) ShuffledHashJoin +(44) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(45) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(44) Project +(46) ShuffledHashJoinExecTemp +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: BuildRight + +(47) Project Output [1]: [o_orderpriority#X] Input [2]: [o_orderkey#X, o_orderpriority#X] -(45) HashAggregate +(48) HashAggregate Input [1]: [o_orderpriority#X] Keys [1]: [o_orderpriority#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [o_orderpriority#X, count#X] -(46) Exchange +(49) Exchange Input [2]: [o_orderpriority#X, count#X] Arguments: hashpartitioning(o_orderpriority#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) HashAggregate +(50) HashAggregate Input [2]: [o_orderpriority#X, count#X] Keys [1]: [o_orderpriority#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [o_orderpriority#X, count(1)#X AS order_count#X] -(48) Exchange +(51) Exchange Input [2]: [o_orderpriority#X, order_count#X] Arguments: rangepartitioning(o_orderpriority#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(49) Sort +(52) Sort Input [2]: [o_orderpriority#X, order_count#X] Arguments: [o_orderpriority#X ASC NULLS FIRST], true, 0 -(50) AdaptiveSparkPlan +(53) AdaptiveSparkPlan Output [2]: [o_orderpriority#X, order_count#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/5.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/5.txt index aff82bdec961..307021bed481 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/5.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/5.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (134) +AdaptiveSparkPlan (149) +- == Final Plan == VeloxColumnarToRowExec (94) +- ^ SortExecTransformer (92) @@ -71,45 +71,60 @@ AdaptiveSparkPlan (134) +- ^ FilterExecTransformer (70) +- ^ Scan parquet (69) +- == Initial Plan == - Sort (133) - +- Exchange (132) - +- HashAggregate (131) - +- Exchange (130) - +- HashAggregate (129) - +- Project (128) - +- ShuffledHashJoin Inner BuildRight (127) - :- Exchange (122) - : +- Project (121) - : +- ShuffledHashJoin Inner BuildRight (120) - : :- Exchange (116) - : : +- Project (115) - : : +- ShuffledHashJoin Inner BuildRight (114) - : : :- Exchange (110) - : : : +- Project (109) - : : : +- ShuffledHashJoin Inner BuildRight (108) - : : : :- Exchange (104) - : : : : +- Project (103) - : : : : +- ShuffledHashJoin Inner BuildLeft (102) - : : : : :- Exchange (97) - : : : : : +- Filter (96) - : : : : : +- Scan parquet (95) - : : : : +- Exchange (101) - : : : : +- Project (100) - : : : : +- Filter (99) - : : : : +- Scan parquet (98) - : : : +- Exchange (107) - : : : +- Filter (106) - : : : +- Scan parquet (105) - : : +- Exchange (113) - : : +- Filter (112) - : : +- Scan parquet (111) - : +- Exchange (119) - : +- Filter (118) - : +- Scan parquet (117) - +- Exchange (126) - +- Project (125) - +- Filter (124) - +- Scan parquet (123) + Sort (148) + +- Exchange (147) + +- HashAggregate (146) + +- Exchange (145) + +- HashAggregate (144) + +- Project (143) + +- ShuffledHashJoinExecTemp (142) + +- SortMergeJoin Inner (141) + :- Sort (135) + : +- Exchange (134) + : +- Project (133) + : +- ShuffledHashJoinExecTemp (132) + : +- SortMergeJoin Inner (131) + : :- Sort (126) + : : +- Exchange (125) + : : +- Project (124) + : : +- ShuffledHashJoinExecTemp (123) + : : +- SortMergeJoin Inner (122) + : : :- Sort (117) + : : : +- Exchange (116) + : : : +- Project (115) + : : : +- ShuffledHashJoinExecTemp (114) + : : : +- SortMergeJoin Inner (113) + : : : :- Sort (108) + : : : : +- Exchange (107) + : : : : +- Project (106) + : : : : +- ShuffledHashJoinExecTemp (105) + : : : : +- SortMergeJoin Inner (104) + : : : : :- Sort (98) + : : : : : +- Exchange (97) + : : : : : +- Filter (96) + : : : : : +- Scan parquet (95) + : : : : +- Sort (103) + : : : : +- Exchange (102) + : : : : +- Project (101) + : : : : +- Filter (100) + : : : : +- Scan parquet (99) + : : : +- Sort (112) + : : : +- Exchange (111) + : : : +- Filter (110) + : : : +- Scan parquet (109) + : : +- Sort (121) + : : +- Exchange (120) + : : +- Filter (119) + : : +- Scan parquet (118) + : +- Sort (130) + : +- Exchange (129) + : +- Filter (128) + : +- Scan parquet (127) + +- Sort (140) + +- Exchange (139) + +- Project (138) + +- Filter (137) + +- Scan parquet (136) (1) Scan parquet @@ -507,176 +522,236 @@ Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(98) Scan parquet +(98) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(99) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1994-01-01), LessThan(o_orderdate,1995-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(99) Filter +(100) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1994-01-01)) AND (o_orderdate#X < 1995-01-01)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(100) Project +(101) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] -(101) Exchange +(102) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(102) ShuffledHashJoin +(103) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(104) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(103) Project +(105) ShuffledHashJoinExecTemp +Input [4]: [c_custkey#X, c_nationkey#X, o_orderkey#X, o_custkey#X] +Arguments: BuildLeft + +(106) Project Output [2]: [c_nationkey#X, o_orderkey#X] Input [4]: [c_custkey#X, c_nationkey#X, o_orderkey#X, o_custkey#X] -(104) Exchange +(107) Exchange Input [2]: [c_nationkey#X, o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(105) Scan parquet +(108) Sort +Input [2]: [c_nationkey#X, o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(109) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_orderkey), IsNotNull(l_suppkey)] ReadSchema: struct -(106) Filter +(110) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Condition : (isnotnull(l_orderkey#X) AND isnotnull(l_suppkey#X)) -(107) Exchange +(111) Exchange Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(108) ShuffledHashJoin +(112) Sort +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(113) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(109) Project +(114) ShuffledHashJoinExecTemp +Input [6]: [c_nationkey#X, o_orderkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: BuildRight + +(115) Project Output [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Input [6]: [c_nationkey#X, o_orderkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] -(110) Exchange +(116) Exchange Input [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(111) Scan parquet +(117) Sort +Input [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST, c_nationkey#X ASC NULLS FIRST], false, 0 + +(118) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(112) Filter +(119) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(113) Exchange +(120) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(121) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST, s_nationkey#X ASC NULLS FIRST], false, 0 + +(122) SortMergeJoin Left keys [2]: [l_suppkey#X, c_nationkey#X] Right keys [2]: [s_suppkey#X, s_nationkey#X] Join condition: None -(115) Project +(123) ShuffledHashJoinExecTemp +Input [6]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] +Arguments: BuildRight + +(124) Project Output [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] Input [6]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(116) Exchange +(125) Exchange Input [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(126) Sort +Input [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(127) Scan parquet Output [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] ReadSchema: struct -(118) Filter +(128) Filter Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Condition : (isnotnull(n_nationkey#X) AND isnotnull(n_regionkey#X)) -(119) Exchange +(129) Exchange Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(130) Sort +Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(131) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(121) Project +(132) ShuffledHashJoinExecTemp +Input [6]: [l_extendedprice#X, l_discount#X, s_nationkey#X, n_nationkey#X, n_name#X, n_regionkey#X] +Arguments: BuildRight + +(133) Project Output [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] Input [6]: [l_extendedprice#X, l_discount#X, s_nationkey#X, n_nationkey#X, n_name#X, n_regionkey#X] -(122) Exchange +(134) Exchange Input [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] Arguments: hashpartitioning(n_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(135) Sort +Input [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] +Arguments: [n_regionkey#X ASC NULLS FIRST], false, 0 + +(136) Scan parquet Output [2]: [r_regionkey#X, r_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(r_name), EqualTo(r_name,ASIA), IsNotNull(r_regionkey)] ReadSchema: struct -(124) Filter +(137) Filter Input [2]: [r_regionkey#X, r_name#X] Condition : ((isnotnull(r_name#X) AND (r_name#X = ASIA)) AND isnotnull(r_regionkey#X)) -(125) Project +(138) Project Output [1]: [r_regionkey#X] Input [2]: [r_regionkey#X, r_name#X] -(126) Exchange +(139) Exchange Input [1]: [r_regionkey#X] Arguments: hashpartitioning(r_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(127) ShuffledHashJoin +(140) Sort +Input [1]: [r_regionkey#X] +Arguments: [r_regionkey#X ASC NULLS FIRST], false, 0 + +(141) SortMergeJoin Left keys [1]: [n_regionkey#X] Right keys [1]: [r_regionkey#X] Join condition: None -(128) Project +(142) ShuffledHashJoinExecTemp +Input [5]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X, r_regionkey#X] +Arguments: BuildRight + +(143) Project Output [3]: [l_extendedprice#X, l_discount#X, n_name#X] Input [5]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X, r_regionkey#X] -(129) HashAggregate +(144) HashAggregate Input [3]: [l_extendedprice#X, l_discount#X, n_name#X] Keys [1]: [n_name#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [n_name#X, sum#X, isEmpty#X] -(130) Exchange +(145) Exchange Input [3]: [n_name#X, sum#X, isEmpty#X] Arguments: hashpartitioning(n_name#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(131) HashAggregate +(146) HashAggregate Input [3]: [n_name#X, sum#X, isEmpty#X] Keys [1]: [n_name#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [2]: [n_name#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X AS revenue#X] -(132) Exchange +(147) Exchange Input [2]: [n_name#X, revenue#X] Arguments: rangepartitioning(revenue#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(133) Sort +(148) Sort Input [2]: [n_name#X, revenue#X] Arguments: [revenue#X DESC NULLS LAST], true, 0 -(134) AdaptiveSparkPlan +(149) AdaptiveSparkPlan Output [2]: [n_name#X, revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/7.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/7.txt index f0650d65e2cf..32634eb4c6e3 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/7.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/7.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (128) +AdaptiveSparkPlan (143) +- == Final Plan == VeloxColumnarToRowExec (90) +- ^ SortExecTransformer (88) @@ -68,43 +68,58 @@ AdaptiveSparkPlan (128) +- ShuffleQueryStage (70) +- ReusedExchange (69) +- == Initial Plan == - Sort (127) - +- Exchange (126) - +- HashAggregate (125) - +- Exchange (124) - +- HashAggregate (123) - +- Project (122) - +- ShuffledHashJoin Inner BuildRight (121) - :- Exchange (117) - : +- Project (116) - : +- ShuffledHashJoin Inner BuildRight (115) - : :- Exchange (111) - : : +- Project (110) - : : +- ShuffledHashJoin Inner BuildRight (109) - : : :- Exchange (105) - : : : +- Project (104) - : : : +- ShuffledHashJoin Inner BuildRight (103) - : : : :- Exchange (99) - : : : : +- Project (98) - : : : : +- ShuffledHashJoin Inner BuildLeft (97) - : : : : :- Exchange (93) - : : : : : +- Filter (92) - : : : : : +- Scan parquet (91) - : : : : +- Exchange (96) - : : : : +- Filter (95) - : : : : +- Scan parquet (94) - : : : +- Exchange (102) - : : : +- Filter (101) - : : : +- Scan parquet (100) - : : +- Exchange (108) - : : +- Filter (107) - : : +- Scan parquet (106) - : +- Exchange (114) - : +- Filter (113) - : +- Scan parquet (112) - +- Exchange (120) - +- Filter (119) - +- Scan parquet (118) + Sort (142) + +- Exchange (141) + +- HashAggregate (140) + +- Exchange (139) + +- HashAggregate (138) + +- Project (137) + +- ShuffledHashJoinExecTemp (136) + +- SortMergeJoin Inner (135) + :- Sort (130) + : +- Exchange (129) + : +- Project (128) + : +- ShuffledHashJoinExecTemp (127) + : +- SortMergeJoin Inner (126) + : :- Sort (121) + : : +- Exchange (120) + : : +- Project (119) + : : +- ShuffledHashJoinExecTemp (118) + : : +- SortMergeJoin Inner (117) + : : :- Sort (112) + : : : +- Exchange (111) + : : : +- Project (110) + : : : +- ShuffledHashJoinExecTemp (109) + : : : +- SortMergeJoin Inner (108) + : : : :- Sort (103) + : : : : +- Exchange (102) + : : : : +- Project (101) + : : : : +- ShuffledHashJoinExecTemp (100) + : : : : +- SortMergeJoin Inner (99) + : : : : :- Sort (94) + : : : : : +- Exchange (93) + : : : : : +- Filter (92) + : : : : : +- Scan parquet (91) + : : : : +- Sort (98) + : : : : +- Exchange (97) + : : : : +- Filter (96) + : : : : +- Scan parquet (95) + : : : +- Sort (107) + : : : +- Exchange (106) + : : : +- Filter (105) + : : : +- Scan parquet (104) + : : +- Sort (116) + : : +- Exchange (115) + : : +- Filter (114) + : : +- Scan parquet (113) + : +- Sort (125) + : +- Exchange (124) + : +- Filter (123) + : +- Scan parquet (122) + +- Sort (134) + +- Exchange (133) + +- Filter (132) + +- Scan parquet (131) (1) Scan parquet @@ -482,168 +497,228 @@ Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(94) Scan parquet +(94) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(95) Scan parquet Output [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1995-01-01), LessThanOrEqual(l_shipdate,1996-12-31), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(95) Filter +(96) Filter Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1995-01-01)) AND (l_shipdate#X <= 1996-12-31)) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(96) Exchange +(97) Exchange Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(97) ShuffledHashJoin +(98) Sort +Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(99) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [l_suppkey#X] Join condition: None -(98) Project +(100) ShuffledHashJoinExecTemp +Input [7]: [s_suppkey#X, s_nationkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: BuildLeft + +(101) Project Output [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Input [7]: [s_suppkey#X, s_nationkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(99) Exchange +(102) Exchange Input [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(100) Scan parquet +(103) Sort +Input [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(104) Scan parquet Output [2]: [o_orderkey#X, o_custkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderkey), IsNotNull(o_custkey)] ReadSchema: struct -(101) Filter +(105) Filter Input [2]: [o_orderkey#X, o_custkey#X] Condition : (isnotnull(o_orderkey#X) AND isnotnull(o_custkey#X)) -(102) Exchange +(106) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(103) ShuffledHashJoin +(107) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(108) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(104) Project +(109) ShuffledHashJoinExecTemp +Input [7]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_orderkey#X, o_custkey#X] +Arguments: BuildRight + +(110) Project Output [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] Input [7]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_orderkey#X, o_custkey#X] -(105) Exchange +(111) Exchange Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(106) Scan parquet +(112) Sort +Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(113) Scan parquet Output [2]: [c_custkey#X, c_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] ReadSchema: struct -(107) Filter +(114) Filter Input [2]: [c_custkey#X, c_nationkey#X] Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) -(108) Exchange +(115) Exchange Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(109) ShuffledHashJoin +(116) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(117) SortMergeJoin Left keys [1]: [o_custkey#X] Right keys [1]: [c_custkey#X] Join condition: None -(110) Project +(118) ShuffledHashJoinExecTemp +Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X, c_custkey#X, c_nationkey#X] +Arguments: BuildRight + +(119) Project Output [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X, c_custkey#X, c_nationkey#X] -(111) Exchange +(120) Exchange Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(112) Scan parquet +(121) Sort +Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(122) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), Or(EqualTo(n_name,FRANCE),EqualTo(n_name,GERMANY))] ReadSchema: struct -(113) Filter +(123) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : (isnotnull(n_nationkey#X) AND ((n_name#X = FRANCE) OR (n_name#X = GERMANY))) -(114) Exchange +(124) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(115) ShuffledHashJoin +(125) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(126) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(116) Project +(127) ShuffledHashJoinExecTemp +Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_nationkey#X, n_name#X] +Arguments: BuildRight + +(128) Project Output [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_nationkey#X, n_name#X] -(117) Exchange +(129) Exchange Input [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(118) Scan parquet +(130) Sort +Input [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(131) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), Or(EqualTo(n_name,GERMANY),EqualTo(n_name,FRANCE))] ReadSchema: struct -(119) Filter +(132) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : (isnotnull(n_nationkey#X) AND ((n_name#X = GERMANY) OR (n_name#X = FRANCE))) -(120) Exchange +(133) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(121) ShuffledHashJoin +(134) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(135) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: (((n_name#X = FRANCE) AND (n_name#X = GERMANY)) OR ((n_name#X = GERMANY) AND (n_name#X = FRANCE))) -(122) Project +(136) ShuffledHashJoinExecTemp +Input [7]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X, n_nationkey#X, n_name#X] +Arguments: BuildRight + +(137) Project Output [4]: [n_name#X AS supp_nation#X, n_name#X AS cust_nation#X, year(l_shipdate#X) AS l_year#X, CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) AS volume#X] Input [7]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X, n_nationkey#X, n_name#X] -(123) HashAggregate +(138) HashAggregate Input [4]: [supp_nation#X, cust_nation#X, l_year#X, volume#X] Keys [3]: [supp_nation#X, cust_nation#X, l_year#X] Functions [1]: [partial_sum(volume#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] -(124) Exchange +(139) Exchange Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] Arguments: hashpartitioning(supp_nation#X, cust_nation#X, l_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(125) HashAggregate +(140) HashAggregate Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] Keys [3]: [supp_nation#X, cust_nation#X, l_year#X] Functions [1]: [sum(volume#X)] Aggregate Attributes [1]: [sum(volume#X)#X] Results [4]: [supp_nation#X, cust_nation#X, l_year#X, sum(volume#X)#X AS revenue#X] -(126) Exchange +(141) Exchange Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: rangepartitioning(supp_nation#X ASC NULLS FIRST, cust_nation#X ASC NULLS FIRST, l_year#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(127) Sort +(142) Sort Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: [supp_nation#X ASC NULLS FIRST, cust_nation#X ASC NULLS FIRST, l_year#X ASC NULLS FIRST], true, 0 -(128) AdaptiveSparkPlan +(143) AdaptiveSparkPlan Output [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/8.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/8.txt index e2a1907549f0..e6845a4e0f2a 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/8.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/8.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (177) +AdaptiveSparkPlan (198) +- == Final Plan == VeloxColumnarToRowExec (125) +- ^ SortExecTransformer (123) @@ -94,57 +94,78 @@ AdaptiveSparkPlan (177) +- ^ FilterExecTransformer (100) +- ^ Scan parquet (99) +- == Initial Plan == - Sort (176) - +- Exchange (175) - +- HashAggregate (174) - +- Exchange (173) - +- HashAggregate (172) - +- Project (171) - +- ShuffledHashJoin Inner BuildRight (170) - :- Exchange (165) - : +- Project (164) - : +- ShuffledHashJoin Inner BuildRight (163) - : :- Exchange (159) - : : +- Project (158) - : : +- ShuffledHashJoin Inner BuildRight (157) - : : :- Exchange (153) - : : : +- Project (152) - : : : +- ShuffledHashJoin Inner BuildRight (151) - : : : :- Exchange (147) - : : : : +- Project (146) - : : : : +- ShuffledHashJoin Inner BuildRight (145) - : : : : :- Exchange (141) - : : : : : +- Project (140) - : : : : : +- ShuffledHashJoin Inner BuildRight (139) - : : : : : :- Exchange (135) - : : : : : : +- Project (134) - : : : : : : +- ShuffledHashJoin Inner BuildLeft (133) - : : : : : : :- Exchange (129) - : : : : : : : +- Project (128) - : : : : : : : +- Filter (127) - : : : : : : : +- Scan parquet (126) - : : : : : : +- Exchange (132) - : : : : : : +- Filter (131) - : : : : : : +- Scan parquet (130) - : : : : : +- Exchange (138) - : : : : : +- Filter (137) - : : : : : +- Scan parquet (136) - : : : : +- Exchange (144) - : : : : +- Filter (143) - : : : : +- Scan parquet (142) - : : : +- Exchange (150) - : : : +- Filter (149) - : : : +- Scan parquet (148) - : : +- Exchange (156) - : : +- Filter (155) - : : +- Scan parquet (154) - : +- Exchange (162) - : +- Filter (161) - : +- Scan parquet (160) - +- Exchange (169) - +- Project (168) - +- Filter (167) - +- Scan parquet (166) + Sort (197) + +- Exchange (196) + +- HashAggregate (195) + +- Exchange (194) + +- HashAggregate (193) + +- Project (192) + +- ShuffledHashJoinExecTemp (191) + +- SortMergeJoin Inner (190) + :- Sort (184) + : +- Exchange (183) + : +- Project (182) + : +- ShuffledHashJoinExecTemp (181) + : +- SortMergeJoin Inner (180) + : :- Sort (175) + : : +- Exchange (174) + : : +- Project (173) + : : +- ShuffledHashJoinExecTemp (172) + : : +- SortMergeJoin Inner (171) + : : :- Sort (166) + : : : +- Exchange (165) + : : : +- Project (164) + : : : +- ShuffledHashJoinExecTemp (163) + : : : +- SortMergeJoin Inner (162) + : : : :- Sort (157) + : : : : +- Exchange (156) + : : : : +- Project (155) + : : : : +- ShuffledHashJoinExecTemp (154) + : : : : +- SortMergeJoin Inner (153) + : : : : :- Sort (148) + : : : : : +- Exchange (147) + : : : : : +- Project (146) + : : : : : +- ShuffledHashJoinExecTemp (145) + : : : : : +- SortMergeJoin Inner (144) + : : : : : :- Sort (139) + : : : : : : +- Exchange (138) + : : : : : : +- Project (137) + : : : : : : +- ShuffledHashJoinExecTemp (136) + : : : : : : +- SortMergeJoin Inner (135) + : : : : : : :- Sort (130) + : : : : : : : +- Exchange (129) + : : : : : : : +- Project (128) + : : : : : : : +- Filter (127) + : : : : : : : +- Scan parquet (126) + : : : : : : +- Sort (134) + : : : : : : +- Exchange (133) + : : : : : : +- Filter (132) + : : : : : : +- Scan parquet (131) + : : : : : +- Sort (143) + : : : : : +- Exchange (142) + : : : : : +- Filter (141) + : : : : : +- Scan parquet (140) + : : : : +- Sort (152) + : : : : +- Exchange (151) + : : : : +- Filter (150) + : : : : +- Scan parquet (149) + : : : +- Sort (161) + : : : +- Exchange (160) + : : : +- Filter (159) + : : : +- Scan parquet (158) + : : +- Sort (170) + : : +- Exchange (169) + : : +- Filter (168) + : : +- Scan parquet (167) + : +- Sort (179) + : +- Exchange (178) + : +- Filter (177) + : +- Scan parquet (176) + +- Sort (189) + +- Exchange (188) + +- Project (187) + +- Filter (186) + +- Scan parquet (185) (1) Scan parquet @@ -670,228 +691,312 @@ Input [2]: [p_partkey#X, p_type#X] Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(130) Scan parquet +(130) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(131) Scan parquet Output [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(131) Filter +(132) Filter Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Condition : ((isnotnull(l_partkey#X) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(132) Exchange +(133) Exchange Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(133) ShuffledHashJoin +(134) Sort +Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(135) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join condition: None -(134) Project +(136) ShuffledHashJoinExecTemp +Input [6]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: BuildLeft + +(137) Project Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Input [6]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] -(135) Exchange +(138) Exchange Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(136) Scan parquet +(139) Sort +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(140) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(137) Filter +(141) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(138) Exchange +(142) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(139) ShuffledHashJoin +(143) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(144) SortMergeJoin Left keys [1]: [l_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(140) Project +(145) ShuffledHashJoinExecTemp +Input [6]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] +Arguments: BuildRight + +(146) Project Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Input [6]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(141) Exchange +(147) Exchange Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(142) Scan parquet +(148) Sort +Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(149) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1995-01-01), LessThanOrEqual(o_orderdate,1996-12-31), IsNotNull(o_orderkey), IsNotNull(o_custkey)] ReadSchema: struct -(143) Filter +(150) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1995-01-01)) AND (o_orderdate#X <= 1996-12-31)) AND isnotnull(o_orderkey#X)) AND isnotnull(o_custkey#X)) -(144) Exchange +(151) Exchange Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(145) ShuffledHashJoin +(152) Sort +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(153) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(146) Project +(154) ShuffledHashJoinExecTemp +Input [7]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X] +Arguments: BuildRight + +(155) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] Input [7]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X] -(147) Exchange +(156) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(148) Scan parquet +(157) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(158) Scan parquet Output [2]: [c_custkey#X, c_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] ReadSchema: struct -(149) Filter +(159) Filter Input [2]: [c_custkey#X, c_nationkey#X] Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) -(150) Exchange +(160) Exchange Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(151) ShuffledHashJoin +(161) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(162) SortMergeJoin Left keys [1]: [o_custkey#X] Right keys [1]: [c_custkey#X] Join condition: None -(152) Project +(163) ShuffledHashJoinExecTemp +Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X, c_custkey#X, c_nationkey#X] +Arguments: BuildRight + +(164) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X, c_custkey#X, c_nationkey#X] -(153) Exchange +(165) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(154) Scan parquet +(166) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(167) Scan parquet Output [2]: [n_nationkey#X, n_regionkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] ReadSchema: struct -(155) Filter +(168) Filter Input [2]: [n_nationkey#X, n_regionkey#X] Condition : (isnotnull(n_nationkey#X) AND isnotnull(n_regionkey#X)) -(156) Exchange +(169) Exchange Input [2]: [n_nationkey#X, n_regionkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(157) ShuffledHashJoin +(170) Sort +Input [2]: [n_nationkey#X, n_regionkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(171) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(158) Project +(172) ShuffledHashJoinExecTemp +Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X, n_nationkey#X, n_regionkey#X] +Arguments: BuildRight + +(173) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X, n_nationkey#X, n_regionkey#X] -(159) Exchange +(174) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(160) Scan parquet +(175) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(176) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(161) Filter +(177) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(162) Exchange +(178) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(163) ShuffledHashJoin +(179) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(180) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(164) Project +(181) ShuffledHashJoinExecTemp +Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X, n_nationkey#X, n_name#X] +Arguments: BuildRight + +(182) Project Output [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X, n_nationkey#X, n_name#X] -(165) Exchange +(183) Exchange Input [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] Arguments: hashpartitioning(n_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(166) Scan parquet +(184) Sort +Input [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] +Arguments: [n_regionkey#X ASC NULLS FIRST], false, 0 + +(185) Scan parquet Output [2]: [r_regionkey#X, r_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(r_name), EqualTo(r_name,AMERICA), IsNotNull(r_regionkey)] ReadSchema: struct -(167) Filter +(186) Filter Input [2]: [r_regionkey#X, r_name#X] Condition : ((isnotnull(r_name#X) AND (r_name#X = AMERICA)) AND isnotnull(r_regionkey#X)) -(168) Project +(187) Project Output [1]: [r_regionkey#X] Input [2]: [r_regionkey#X, r_name#X] -(169) Exchange +(188) Exchange Input [1]: [r_regionkey#X] Arguments: hashpartitioning(r_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(170) ShuffledHashJoin +(189) Sort +Input [1]: [r_regionkey#X] +Arguments: [r_regionkey#X ASC NULLS FIRST], false, 0 + +(190) SortMergeJoin Left keys [1]: [n_regionkey#X] Right keys [1]: [r_regionkey#X] Join condition: None -(171) Project +(191) ShuffledHashJoinExecTemp +Input [6]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X, r_regionkey#X] +Arguments: BuildRight + +(192) Project Output [3]: [year(o_orderdate#X) AS o_year#X, CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) AS volume#X, n_name#X AS nation#X] Input [6]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X, r_regionkey#X] -(172) HashAggregate +(193) HashAggregate Input [3]: [o_year#X, volume#X, nation#X] Keys [1]: [o_year#X] Functions [2]: [partial_sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END), partial_sum(volume#X)] Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Results [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] -(173) Exchange +(194) Exchange Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] Arguments: hashpartitioning(o_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(174) HashAggregate +(195) HashAggregate Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] Keys [1]: [o_year#X] Functions [2]: [sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END), sum(volume#X)] Aggregate Attributes [2]: [sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X, sum(volume#X)#X] Results [2]: [o_year#X, CheckOverflow((promote_precision(sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X) / promote_precision(sum(volume#X)#X)), DecimalType(38,6), true) AS mkt_share#X] -(175) Exchange +(196) Exchange Input [2]: [o_year#X, mkt_share#X] Arguments: rangepartitioning(o_year#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(176) Sort +(197) Sort Input [2]: [o_year#X, mkt_share#X] Arguments: [o_year#X ASC NULLS FIRST], true, 0 -(177) AdaptiveSparkPlan +(198) AdaptiveSparkPlan Output [2]: [o_year#X, mkt_share#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/9.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/9.txt index b86e86d560e5..a3848b2eab02 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/9.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/9.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (133) +AdaptiveSparkPlan (148) +- == Final Plan == VeloxColumnarToRowExec (94) +- ^ SortExecTransformer (92) @@ -71,44 +71,59 @@ AdaptiveSparkPlan (133) +- ^ FilterExecTransformer (70) +- ^ Scan parquet (69) +- == Initial Plan == - Sort (132) - +- Exchange (131) - +- HashAggregate (130) - +- Exchange (129) - +- HashAggregate (128) - +- Project (127) - +- ShuffledHashJoin Inner BuildRight (126) - :- Exchange (122) - : +- Project (121) - : +- ShuffledHashJoin Inner BuildRight (120) - : :- Exchange (116) - : : +- Project (115) - : : +- ShuffledHashJoin Inner BuildRight (114) - : : :- Exchange (110) - : : : +- Project (109) - : : : +- ShuffledHashJoin Inner BuildRight (108) - : : : :- Exchange (104) - : : : : +- Project (103) - : : : : +- ShuffledHashJoin Inner BuildLeft (102) - : : : : :- Exchange (98) - : : : : : +- Project (97) - : : : : : +- Filter (96) - : : : : : +- Scan parquet (95) - : : : : +- Exchange (101) - : : : : +- Filter (100) - : : : : +- Scan parquet (99) - : : : +- Exchange (107) - : : : +- Filter (106) - : : : +- Scan parquet (105) - : : +- Exchange (113) - : : +- Filter (112) - : : +- Scan parquet (111) - : +- Exchange (119) - : +- Filter (118) - : +- Scan parquet (117) - +- Exchange (125) - +- Filter (124) - +- Scan parquet (123) + Sort (147) + +- Exchange (146) + +- HashAggregate (145) + +- Exchange (144) + +- HashAggregate (143) + +- Project (142) + +- ShuffledHashJoinExecTemp (141) + +- SortMergeJoin Inner (140) + :- Sort (135) + : +- Exchange (134) + : +- Project (133) + : +- ShuffledHashJoinExecTemp (132) + : +- SortMergeJoin Inner (131) + : :- Sort (126) + : : +- Exchange (125) + : : +- Project (124) + : : +- ShuffledHashJoinExecTemp (123) + : : +- SortMergeJoin Inner (122) + : : :- Sort (117) + : : : +- Exchange (116) + : : : +- Project (115) + : : : +- ShuffledHashJoinExecTemp (114) + : : : +- SortMergeJoin Inner (113) + : : : :- Sort (108) + : : : : +- Exchange (107) + : : : : +- Project (106) + : : : : +- ShuffledHashJoinExecTemp (105) + : : : : +- SortMergeJoin Inner (104) + : : : : :- Sort (99) + : : : : : +- Exchange (98) + : : : : : +- Project (97) + : : : : : +- Filter (96) + : : : : : +- Scan parquet (95) + : : : : +- Sort (103) + : : : : +- Exchange (102) + : : : : +- Filter (101) + : : : : +- Scan parquet (100) + : : : +- Sort (112) + : : : +- Exchange (111) + : : : +- Filter (110) + : : : +- Scan parquet (109) + : : +- Sort (121) + : : +- Exchange (120) + : : +- Filter (119) + : : +- Scan parquet (118) + : +- Sort (130) + : +- Exchange (129) + : +- Filter (128) + : +- Scan parquet (127) + +- Sort (139) + +- Exchange (138) + +- Filter (137) + +- Scan parquet (136) (1) Scan parquet @@ -510,168 +525,228 @@ Input [2]: [p_partkey#X, p_name#X] Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(99) Scan parquet +(99) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(100) Scan parquet Output [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(100) Filter +(101) Filter Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Condition : ((isnotnull(l_partkey#X) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(101) Exchange +(102) Exchange Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(102) ShuffledHashJoin +(103) Sort +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(104) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join condition: None -(103) Project +(105) ShuffledHashJoinExecTemp +Input [7]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: BuildLeft + +(106) Project Output [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Input [7]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] -(104) Exchange +(107) Exchange Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(105) Scan parquet +(108) Sort +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(109) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(106) Filter +(110) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(107) Exchange +(111) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(108) ShuffledHashJoin +(112) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(113) SortMergeJoin Left keys [1]: [l_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(109) Project +(114) ShuffledHashJoinExecTemp +Input [8]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] +Arguments: BuildRight + +(115) Project Output [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Input [8]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(110) Exchange +(116) Exchange Input [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(l_suppkey#X, l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(111) Scan parquet +(117) Sort +Input [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [l_suppkey#X ASC NULLS FIRST, l_partkey#X ASC NULLS FIRST], false, 0 + +(118) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey), IsNotNull(ps_partkey)] ReadSchema: struct -(112) Filter +(119) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Condition : (isnotnull(ps_suppkey#X) AND isnotnull(ps_partkey#X)) -(113) Exchange +(120) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(121) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST, ps_partkey#X ASC NULLS FIRST], false, 0 + +(122) SortMergeJoin Left keys [2]: [l_suppkey#X, l_partkey#X] Right keys [2]: [ps_suppkey#X, ps_partkey#X] Join condition: None -(115) Project +(123) ShuffledHashJoinExecTemp +Input [10]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_partkey#X, ps_suppkey#X, ps_supplycost#X] +Arguments: BuildRight + +(124) Project Output [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] Input [10]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_partkey#X, ps_suppkey#X, ps_supplycost#X] -(116) Exchange +(125) Exchange Input [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(126) Sort +Input [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(127) Scan parquet Output [2]: [o_orderkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderkey)] ReadSchema: struct -(118) Filter +(128) Filter Input [2]: [o_orderkey#X, o_orderdate#X] Condition : isnotnull(o_orderkey#X) -(119) Exchange +(129) Exchange Input [2]: [o_orderkey#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(130) Sort +Input [2]: [o_orderkey#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(131) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(121) Project +(132) ShuffledHashJoinExecTemp +Input [8]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderkey#X, o_orderdate#X] +Arguments: BuildRight + +(133) Project Output [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] Input [8]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderkey#X, o_orderdate#X] -(122) Exchange +(134) Exchange Input [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(135) Sort +Input [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(136) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(124) Filter +(137) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(125) Exchange +(138) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) ShuffledHashJoin +(139) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(140) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(127) Project +(141) ShuffledHashJoinExecTemp +Input [8]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X, n_nationkey#X, n_name#X] +Arguments: BuildRight + +(142) Project Output [3]: [n_name#X AS nation#X, year(o_orderdate#X) AS o_year#X, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) as decimal(27,4))) - promote_precision(cast(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(l_quantity#X)), DecimalType(25,4), true) as decimal(27,4)))), DecimalType(27,4), true) AS amount#X] Input [8]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X, n_nationkey#X, n_name#X] -(128) HashAggregate +(143) HashAggregate Input [3]: [nation#X, o_year#X, amount#X] Keys [2]: [nation#X, o_year#X] Functions [1]: [partial_sum(amount#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [nation#X, o_year#X, sum#X, isEmpty#X] -(129) Exchange +(144) Exchange Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] Arguments: hashpartitioning(nation#X, o_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(130) HashAggregate +(145) HashAggregate Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] Keys [2]: [nation#X, o_year#X] Functions [1]: [sum(amount#X)] Aggregate Attributes [1]: [sum(amount#X)#X] Results [3]: [nation#X, o_year#X, sum(amount#X)#X AS sum_profit#X] -(131) Exchange +(146) Exchange Input [3]: [nation#X, o_year#X, sum_profit#X] Arguments: rangepartitioning(nation#X ASC NULLS FIRST, o_year#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) Sort +(147) Sort Input [3]: [nation#X, o_year#X, sum_profit#X] Arguments: [nation#X ASC NULLS FIRST, o_year#X DESC NULLS LAST], true, 0 -(133) AdaptiveSparkPlan +(148) AdaptiveSparkPlan Output [3]: [nation#X, o_year#X, sum_profit#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/10.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/10.txt index 680971de4c1d..368250980c75 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/10.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/10.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (87) +AdaptiveSparkPlan (96) +- == Final Plan == VeloxColumnarToRowExec (60) +- TakeOrderedAndProjectExecTransformer (59) @@ -47,32 +47,41 @@ AdaptiveSparkPlan (87) +- ^ FilterExecTransformer (40) +- ^ Scan parquet (39) +- == Initial Plan == - TakeOrderedAndProject (86) - +- HashAggregate (85) - +- Exchange (84) - +- HashAggregate (83) - +- Project (82) - +- ShuffledHashJoin Inner BuildRight (81) - :- Exchange (77) - : +- Project (76) - : +- ShuffledHashJoin Inner BuildRight (75) - : :- Exchange (70) - : : +- Project (69) - : : +- ShuffledHashJoin Inner BuildRight (68) - : : :- Exchange (63) - : : : +- Filter (62) - : : : +- Scan parquet (61) - : : +- Exchange (67) - : : +- Project (66) - : : +- Filter (65) - : : +- Scan parquet (64) - : +- Exchange (74) - : +- Project (73) - : +- Filter (72) - : +- Scan parquet (71) - +- Exchange (80) - +- Filter (79) - +- Scan parquet (78) + TakeOrderedAndProject (95) + +- HashAggregate (94) + +- Exchange (93) + +- HashAggregate (92) + +- Project (91) + +- ShuffledHashJoinExecTemp (90) + +- SortMergeJoin Inner (89) + :- Sort (84) + : +- Exchange (83) + : +- Project (82) + : +- ShuffledHashJoinExecTemp (81) + : +- SortMergeJoin Inner (80) + : :- Sort (74) + : : +- Exchange (73) + : : +- Project (72) + : : +- ShuffledHashJoinExecTemp (71) + : : +- SortMergeJoin Inner (70) + : : :- Sort (64) + : : : +- Exchange (63) + : : : +- Filter (62) + : : : +- Scan parquet (61) + : : +- Sort (69) + : : +- Exchange (68) + : : +- Project (67) + : : +- Filter (66) + : : +- Scan parquet (65) + : +- Sort (79) + : +- Exchange (78) + : +- Project (77) + : +- Filter (76) + : +- Scan parquet (75) + +- Sort (88) + +- Exchange (87) + +- Filter (86) + +- Scan parquet (85) (1) Scan parquet @@ -336,116 +345,152 @@ Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) Input [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(64) Scan parquet +(64) Sort +Input [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(65) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1993-10-01), LessThan(o_orderdate,1994-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(65) Filter +(66) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1993-10-01)) AND (o_orderdate#X < 1994-01-01)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(66) Project +(67) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] -(67) Exchange +(68) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(68) ShuffledHashJoin +(69) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(70) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(69) Project +(71) ShuffledHashJoinExecTemp +Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, o_custkey#X] +Arguments: BuildRight + +(72) Project Output [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, o_custkey#X] -(70) Exchange +(73) Exchange Input [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(71) Scan parquet +(74) Sort +Input [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(75) Scan parquet Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_returnflag), EqualTo(l_returnflag,R), IsNotNull(l_orderkey)] ReadSchema: struct -(72) Filter +(76) Filter Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] Condition : ((isnotnull(l_returnflag#X) AND (l_returnflag#X = R)) AND isnotnull(l_orderkey#X)) -(73) Project +(77) Project Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] -(74) Exchange +(78) Exchange Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(75) ShuffledHashJoin +(79) Sort +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(80) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(76) Project +(81) ShuffledHashJoinExecTemp +Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: BuildRight + +(82) Project Output [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X] -(77) Exchange +(83) Exchange Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(78) Scan parquet +(84) Sort +Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(85) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(79) Filter +(86) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(80) Exchange +(87) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(81) ShuffledHashJoin +(88) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(89) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(82) Project +(90) ShuffledHashJoinExecTemp +Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_nationkey#X, n_name#X] +Arguments: BuildRight + +(91) Project Output [9]: [c_custkey#X, c_name#X, c_address#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_name#X] Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_nationkey#X, n_name#X] -(83) HashAggregate +(92) HashAggregate Input [9]: [c_custkey#X, c_name#X, c_address#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_name#X] Keys [7]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] -(84) Exchange +(93) Exchange Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] Arguments: hashpartitioning(c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(85) HashAggregate +(94) HashAggregate Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] Keys [7]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [8]: [c_custkey#X, c_name#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] -(86) TakeOrderedAndProject +(95) TakeOrderedAndProject Input [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] Arguments: X, [revenue#X DESC NULLS LAST], [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] -(87) AdaptiveSparkPlan +(96) AdaptiveSparkPlan Output [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/11.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/11.txt index b964ec1ed8d4..6c2335488a12 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/11.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/11.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (72) +AdaptiveSparkPlan (78) +- == Final Plan == VeloxColumnarToRowExec (50) +- ^ SortExecTransformer (48) @@ -39,27 +39,33 @@ AdaptiveSparkPlan (72) +- ^ FilterExecTransformer (25) +- ^ Scan parquet (24) +- == Initial Plan == - Sort (71) - +- Exchange (70) - +- Filter (69) - +- HashAggregate (68) - +- Exchange (67) - +- HashAggregate (66) - +- Project (65) - +- ShuffledHashJoin Inner BuildRight (64) - :- Exchange (59) - : +- Project (58) - : +- ShuffledHashJoin Inner BuildRight (57) - : :- Exchange (53) - : : +- Filter (52) - : : +- Scan parquet (51) - : +- Exchange (56) - : +- Filter (55) - : +- Scan parquet (54) - +- Exchange (63) - +- Project (62) - +- Filter (61) - +- Scan parquet (60) + Sort (77) + +- Exchange (76) + +- Filter (75) + +- HashAggregate (74) + +- Exchange (73) + +- HashAggregate (72) + +- Project (71) + +- ShuffledHashJoinExecTemp (70) + +- SortMergeJoin Inner (69) + :- Sort (63) + : +- Exchange (62) + : +- Project (61) + : +- ShuffledHashJoinExecTemp (60) + : +- SortMergeJoin Inner (59) + : :- Sort (54) + : : +- Exchange (53) + : : +- Filter (52) + : : +- Scan parquet (51) + : +- Sort (58) + : +- Exchange (57) + : +- Filter (56) + : +- Scan parquet (55) + +- Sort (68) + +- Exchange (67) + +- Project (66) + +- Filter (65) + +- Scan parquet (64) (1) Scan parquet @@ -281,349 +287,403 @@ Condition : isnotnull(ps_suppkey#X) Input [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(54) Scan parquet +(54) Sort +Input [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(55) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(55) Filter +(56) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(56) Exchange +(57) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(57) ShuffledHashJoin +(58) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(59) SortMergeJoin Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(58) Project +(60) ShuffledHashJoinExecTemp +Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] +Arguments: BuildRight + +(61) Project Output [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(59) Exchange +(62) Exchange Input [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(60) Scan parquet +(63) Sort +Input [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(64) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,GERMANY), IsNotNull(n_nationkey)] ReadSchema: struct -(61) Filter +(65) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = GERMANY)) AND isnotnull(n_nationkey#X)) -(62) Project +(66) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(63) Exchange +(67) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(64) ShuffledHashJoin +(68) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(69) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(65) Project +(70) ShuffledHashJoinExecTemp +Input [5]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] +Arguments: BuildRight + +(71) Project Output [3]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X] Input [5]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(66) HashAggregate +(72) HashAggregate Input [3]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X] Keys [1]: [ps_partkey#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [ps_partkey#X, sum#X, isEmpty#X] -(67) Exchange +(73) Exchange Input [3]: [ps_partkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(68) HashAggregate +(74) HashAggregate Input [3]: [ps_partkey#X, sum#X, isEmpty#X] Keys [1]: [ps_partkey#X] Functions [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X] Results [2]: [ps_partkey#X, sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X AS value#X] -(69) Filter +(75) Filter Input [2]: [ps_partkey#X, value#X] Condition : (isnotnull(value#X) AND (cast(value#X as decimal(38,6)) > Subquery subquery#X, [id=#X])) -(70) Exchange +(76) Exchange Input [2]: [ps_partkey#X, value#X] Arguments: rangepartitioning(value#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(71) Sort +(77) Sort Input [2]: [ps_partkey#X, value#X] Arguments: [value#X DESC NULLS LAST], true, 0 -(72) AdaptiveSparkPlan +(78) AdaptiveSparkPlan Output [2]: [ps_partkey#X, value#X] Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 42 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (120) +AdaptiveSparkPlan (132) +- == Final Plan == - VeloxColumnarToRowExec (102) - +- ^ ProjectExecTransformer (100) - +- ^ RegularHashAggregateExecTransformer (99) - +- ^ RegularHashAggregateExecTransformer (98) - +- ^ ProjectExecTransformer (97) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (96) - :- ^ InputIteratorTransformer (91) - : +- ShuffleQueryStage (89), Statistics(X) - : +- ColumnarExchange (88) - : +- ^ ProjectExecTransformer (86) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (85) - : :- ^ InputIteratorTransformer (80) - : : +- ShuffleQueryStage (78), Statistics(X) - : : +- ColumnarExchange (77) - : : +- ^ ProjectExecTransformer (75) - : : +- ^ FilterExecTransformer (74) - : : +- ^ Scan parquet (73) - : +- ^ InputIteratorTransformer (84) - : +- ShuffleQueryStage (82), Statistics(X) - : +- ReusedExchange (81) - +- ^ InputIteratorTransformer (95) - +- ShuffleQueryStage (93), Statistics(X) - +- ReusedExchange (92) + VeloxColumnarToRowExec (108) + +- ^ ProjectExecTransformer (106) + +- ^ RegularHashAggregateExecTransformer (105) + +- ^ RegularHashAggregateExecTransformer (104) + +- ^ ProjectExecTransformer (103) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (102) + :- ^ InputIteratorTransformer (97) + : +- ShuffleQueryStage (95), Statistics(X) + : +- ColumnarExchange (94) + : +- ^ ProjectExecTransformer (92) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (91) + : :- ^ InputIteratorTransformer (86) + : : +- ShuffleQueryStage (84), Statistics(X) + : : +- ColumnarExchange (83) + : : +- ^ ProjectExecTransformer (81) + : : +- ^ FilterExecTransformer (80) + : : +- ^ Scan parquet (79) + : +- ^ InputIteratorTransformer (90) + : +- ShuffleQueryStage (88), Statistics(X) + : +- ReusedExchange (87) + +- ^ InputIteratorTransformer (101) + +- ShuffleQueryStage (99), Statistics(X) + +- ReusedExchange (98) +- == Initial Plan == - HashAggregate (119) - +- HashAggregate (118) - +- Project (117) - +- ShuffledHashJoin Inner BuildRight (116) - :- Exchange (111) - : +- Project (110) - : +- ShuffledHashJoin Inner BuildRight (109) - : :- Exchange (105) - : : +- Filter (104) - : : +- Scan parquet (103) - : +- Exchange (108) - : +- Filter (107) - : +- Scan parquet (106) - +- Exchange (115) - +- Project (114) - +- Filter (113) - +- Scan parquet (112) - - -(73) Scan parquet + HashAggregate (131) + +- HashAggregate (130) + +- Project (129) + +- ShuffledHashJoinExecTemp (128) + +- SortMergeJoin Inner (127) + :- Sort (121) + : +- Exchange (120) + : +- Project (119) + : +- ShuffledHashJoinExecTemp (118) + : +- SortMergeJoin Inner (117) + : :- Sort (112) + : : +- Exchange (111) + : : +- Filter (110) + : : +- Scan parquet (109) + : +- Sort (116) + : +- Exchange (115) + : +- Filter (114) + : +- Scan parquet (113) + +- Sort (126) + +- Exchange (125) + +- Project (124) + +- Filter (123) + +- Scan parquet (122) + + +(79) Scan parquet Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey)] ReadSchema: struct -(74) FilterExecTransformer +(80) FilterExecTransformer Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: isnotnull(ps_suppkey#X) -(75) ProjectExecTransformer +(81) ProjectExecTransformer Output [4]: [hash(ps_suppkey#X, 42) AS hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] -(76) WholeStageCodegenTransformer (X) +(82) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: false -(77) ColumnarExchange +(83) ColumnarExchange Input [4]: [hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [ps_suppkey#X, ps_availqty#X, ps_supplycost#X], [plan_id=X], [id=#X] -(78) ShuffleQueryStage +(84) ShuffleQueryStage Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: X -(79) InputAdapter +(85) InputAdapter Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] -(80) InputIteratorTransformer +(86) InputIteratorTransformer Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] -(81) ReusedExchange [Reuses operator id: 13] +(87) ReusedExchange [Reuses operator id: 13] Output [2]: [s_suppkey#X, s_nationkey#X] -(82) ShuffleQueryStage +(88) ShuffleQueryStage Output [2]: [s_suppkey#X, s_nationkey#X] Arguments: X -(83) InputAdapter +(89) InputAdapter Input [2]: [s_suppkey#X, s_nationkey#X] -(84) InputIteratorTransformer +(90) InputIteratorTransformer Input [2]: [s_suppkey#X, s_nationkey#X] -(85) ShuffledHashJoinExecTransformer +(91) ShuffledHashJoinExecTransformer Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(86) ProjectExecTransformer +(92) ProjectExecTransformer Output [4]: [hash(s_nationkey#X, 42) AS hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [5]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(87) WholeStageCodegenTransformer (X) +(93) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: false -(88) ColumnarExchange +(94) ColumnarExchange Input [4]: [hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [ps_availqty#X, ps_supplycost#X, s_nationkey#X], [plan_id=X], [id=#X] -(89) ShuffleQueryStage +(95) ShuffleQueryStage Output [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: X -(90) InputAdapter +(96) InputAdapter Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] -(91) InputIteratorTransformer +(97) InputIteratorTransformer Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] -(92) ReusedExchange [Reuses operator id: 28] +(98) ReusedExchange [Reuses operator id: 28] Output [1]: [n_nationkey#X] -(93) ShuffleQueryStage +(99) ShuffleQueryStage Output [1]: [n_nationkey#X] Arguments: X -(94) InputAdapter +(100) InputAdapter Input [1]: [n_nationkey#X] -(95) InputIteratorTransformer +(101) InputIteratorTransformer Input [1]: [n_nationkey#X] -(96) ShuffledHashJoinExecTransformer +(102) ShuffledHashJoinExecTransformer Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(97) ProjectExecTransformer +(103) ProjectExecTransformer Output [3]: [ps_availqty#X, ps_supplycost#X, CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)) AS _pre_X#X] Input [4]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(98) RegularHashAggregateExecTransformer +(104) RegularHashAggregateExecTransformer Input [3]: [ps_availqty#X, ps_supplycost#X, _pre_X#X] Keys: [] Functions [1]: [partial_sum(_pre_X#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(99) RegularHashAggregateExecTransformer +(105) RegularHashAggregateExecTransformer Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X] Results [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X] -(100) ProjectExecTransformer +(106) ProjectExecTransformer Output [1]: [CheckOverflow((promote_precision(cast(sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X as decimal(38,10))) * 0.0001000000), DecimalType(38,6)) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Input [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X] -(101) WholeStageCodegenTransformer (X) +(107) WholeStageCodegenTransformer (X) Input [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Arguments: false -(102) VeloxColumnarToRowExec +(108) VeloxColumnarToRowExec Input [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] -(103) Scan parquet +(109) Scan parquet Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey)] ReadSchema: struct -(104) Filter +(110) Filter Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Condition : isnotnull(ps_suppkey#X) -(105) Exchange +(111) Exchange Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(106) Scan parquet +(112) Sort +Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(113) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(107) Filter +(114) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(108) Exchange +(115) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(109) ShuffledHashJoin +(116) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(117) SortMergeJoin Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(110) Project +(118) ShuffledHashJoinExecTemp +Input [5]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] +Arguments: BuildRight + +(119) Project Output [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [5]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(111) Exchange +(120) Exchange Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(112) Scan parquet +(121) Sort +Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(122) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,GERMANY), IsNotNull(n_nationkey)] ReadSchema: struct -(113) Filter +(123) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = GERMANY)) AND isnotnull(n_nationkey#X)) -(114) Project +(124) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(115) Exchange +(125) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(116) ShuffledHashJoin +(126) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(127) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(117) Project +(128) ShuffledHashJoinExecTemp +Input [4]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] +Arguments: BuildRight + +(129) Project Output [2]: [ps_availqty#X, ps_supplycost#X] Input [4]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(118) HashAggregate +(130) HashAggregate Input [2]: [ps_availqty#X, ps_supplycost#X] Keys: [] Functions [1]: [partial_sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(119) HashAggregate +(131) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X] Results [1]: [CheckOverflow((promote_precision(cast(sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X as decimal(38,10))) * 0.0001000000), DecimalType(38,6)) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] -(120) AdaptiveSparkPlan +(132) AdaptiveSparkPlan Output [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/12.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/12.txt index fabde3ecd687..d82736a57532 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/12.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/12.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (49) +AdaptiveSparkPlan (52) +- == Final Plan == VeloxColumnarToRowExec (34) +- ^ SortExecTransformer (32) @@ -27,20 +27,23 @@ AdaptiveSparkPlan (49) +- ^ FilterExecTransformer (10) +- ^ Scan parquet (9) +- == Initial Plan == - Sort (48) - +- Exchange (47) - +- HashAggregate (46) - +- Exchange (45) - +- HashAggregate (44) - +- Project (43) - +- ShuffledHashJoin Inner BuildLeft (42) - :- Exchange (37) - : +- Filter (36) - : +- Scan parquet (35) - +- Exchange (41) - +- Project (40) - +- Filter (39) - +- Scan parquet (38) + Sort (51) + +- Exchange (50) + +- HashAggregate (49) + +- Exchange (48) + +- HashAggregate (47) + +- Project (46) + +- ShuffledHashJoinExecTemp (45) + +- SortMergeJoin Inner (44) + :- Sort (38) + : +- Exchange (37) + : +- Filter (36) + : +- Scan parquet (35) + +- Sort (43) + +- Exchange (42) + +- Project (41) + +- Filter (40) + +- Scan parquet (39) (1) Scan parquet @@ -198,60 +201,72 @@ Condition : isnotnull(o_orderkey#X) Input [2]: [o_orderkey#X, o_orderpriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(38) Scan parquet +(38) Sort +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(39) Scan parquet Output [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate), IsNotNull(l_shipdate), In(l_shipmode, [MAIL,SHIP]), GreaterThanOrEqual(l_receiptdate,1994-01-01), LessThan(l_receiptdate,1995-01-01), IsNotNull(l_orderkey)] ReadSchema: struct -(39) Filter +(40) Filter Input [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] Condition : ((((((((isnotnull(l_commitdate#X) AND isnotnull(l_receiptdate#X)) AND isnotnull(l_shipdate#X)) AND l_shipmode#X IN (MAIL,SHIP)) AND (l_commitdate#X < l_receiptdate#X)) AND (l_shipdate#X < l_commitdate#X)) AND (l_receiptdate#X >= 1994-01-01)) AND (l_receiptdate#X < 1995-01-01)) AND isnotnull(l_orderkey#X)) -(40) Project +(41) Project Output [2]: [l_orderkey#X, l_shipmode#X] Input [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] -(41) Exchange +(42) Exchange Input [2]: [l_orderkey#X, l_shipmode#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(42) ShuffledHashJoin +(43) Sort +Input [2]: [l_orderkey#X, l_shipmode#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(44) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(43) Project +(45) ShuffledHashJoinExecTemp +Input [4]: [o_orderkey#X, o_orderpriority#X, l_orderkey#X, l_shipmode#X] +Arguments: BuildLeft + +(46) Project Output [2]: [o_orderpriority#X, l_shipmode#X] Input [4]: [o_orderkey#X, o_orderpriority#X, l_orderkey#X, l_shipmode#X] -(44) HashAggregate +(47) HashAggregate Input [2]: [o_orderpriority#X, l_shipmode#X] Keys [1]: [l_shipmode#X] Functions [2]: [partial_sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)] Aggregate Attributes [2]: [sum#X, sum#X] Results [3]: [l_shipmode#X, sum#X, sum#X] -(45) Exchange +(48) Exchange Input [3]: [l_shipmode#X, sum#X, sum#X] Arguments: hashpartitioning(l_shipmode#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(46) HashAggregate +(49) HashAggregate Input [3]: [l_shipmode#X, sum#X, sum#X] Keys [1]: [l_shipmode#X] Functions [2]: [sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END), sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)] Aggregate Attributes [2]: [sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X, sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X] Results [3]: [l_shipmode#X, sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X AS high_line_count#X, sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X AS low_line_count#X] -(47) Exchange +(50) Exchange Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: rangepartitioning(l_shipmode#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) Sort +(51) Sort Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: [l_shipmode#X ASC NULLS FIRST], true, 0 -(49) AdaptiveSparkPlan +(52) AdaptiveSparkPlan Output [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/13.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/13.txt index 6ca6e75ac545..3ea75a8141dc 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/13.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/13.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (52) +AdaptiveSparkPlan (55) +- == Final Plan == VeloxColumnarToRowExec (36) +- ^ SortExecTransformer (34) @@ -29,21 +29,24 @@ AdaptiveSparkPlan (52) +- ^ FilterExecTransformer (9) +- ^ Scan parquet (8) +- == Initial Plan == - Sort (51) - +- Exchange (50) - +- HashAggregate (49) - +- Exchange (48) - +- HashAggregate (47) - +- HashAggregate (46) - +- HashAggregate (45) - +- Project (44) - +- ShuffledHashJoin LeftOuter BuildRight (43) - :- Exchange (38) - : +- Scan parquet (37) - +- Exchange (42) - +- Project (41) - +- Filter (40) - +- Scan parquet (39) + Sort (54) + +- Exchange (53) + +- HashAggregate (52) + +- Exchange (51) + +- HashAggregate (50) + +- HashAggregate (49) + +- HashAggregate (48) + +- Project (47) + +- ShuffledHashJoinExecTemp (46) + +- SortMergeJoin LeftOuter (45) + :- Sort (39) + : +- Exchange (38) + : +- Scan parquet (37) + +- Sort (44) + +- Exchange (43) + +- Project (42) + +- Filter (41) + +- Scan parquet (40) (1) Scan parquet @@ -209,74 +212,86 @@ ReadSchema: struct Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(39) Scan parquet +(39) Sort +Input [1]: [c_custkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(40) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_comment), IsNotNull(o_custkey)] ReadSchema: struct -(40) Filter +(41) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Condition : ((isnotnull(o_comment#X) AND NOT o_comment#X LIKE %special%requests%) AND isnotnull(o_custkey#X)) -(41) Project +(42) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] -(42) Exchange +(43) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) ShuffledHashJoin +(44) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(45) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(44) Project +(46) ShuffledHashJoinExecTemp +Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] +Arguments: BuildLeft + +(47) Project Output [2]: [c_custkey#X, o_orderkey#X] Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] -(45) HashAggregate +(48) HashAggregate Input [2]: [c_custkey#X, o_orderkey#X] Keys [1]: [c_custkey#X] Functions [1]: [partial_count(o_orderkey#X)] Aggregate Attributes [1]: [count#X] Results [2]: [c_custkey#X, count#X] -(46) HashAggregate +(49) HashAggregate Input [2]: [c_custkey#X, count#X] Keys [1]: [c_custkey#X] Functions [1]: [count(o_orderkey#X)] Aggregate Attributes [1]: [count(o_orderkey#X)#X] Results [1]: [count(o_orderkey#X)#X AS c_count#X] -(47) HashAggregate +(50) HashAggregate Input [1]: [c_count#X] Keys [1]: [c_count#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [c_count#X, count#X] -(48) Exchange +(51) Exchange Input [2]: [c_count#X, count#X] Arguments: hashpartitioning(c_count#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(49) HashAggregate +(52) HashAggregate Input [2]: [c_count#X, count#X] Keys [1]: [c_count#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [c_count#X, count(1)#X AS custdist#X] -(50) Exchange +(53) Exchange Input [2]: [c_count#X, custdist#X] Arguments: rangepartitioning(custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(51) Sort +(54) Sort Input [2]: [c_count#X, custdist#X] Arguments: [custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST], true, 0 -(52) AdaptiveSparkPlan +(55) AdaptiveSparkPlan Output [2]: [c_count#X, custdist#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/14.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/14.txt index c1a4a2fe884f..f091e13d3f00 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/14.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/14.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (35) +AdaptiveSparkPlan (38) +- == Final Plan == VeloxColumnarToRowExec (23) +- ^ ProjectExecTransformer (21) @@ -20,17 +20,20 @@ AdaptiveSparkPlan (35) +- ^ FilterExecTransformer (10) +- ^ Scan parquet (9) +- == Initial Plan == - HashAggregate (34) - +- HashAggregate (33) - +- Project (32) - +- ShuffledHashJoin Inner BuildRight (31) - :- Exchange (27) - : +- Project (26) - : +- Filter (25) - : +- Scan parquet (24) - +- Exchange (30) - +- Filter (29) - +- Scan parquet (28) + HashAggregate (37) + +- HashAggregate (36) + +- Project (35) + +- ShuffledHashJoinExecTemp (34) + +- SortMergeJoin Inner (33) + :- Sort (28) + : +- Exchange (27) + : +- Project (26) + : +- Filter (25) + : +- Scan parquet (24) + +- Sort (32) + +- Exchange (31) + +- Filter (30) + +- Scan parquet (29) (1) Scan parquet @@ -152,44 +155,56 @@ Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(28) Scan parquet +(28) Sort +Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(29) Scan parquet Output [2]: [p_partkey#X, p_type#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_partkey)] ReadSchema: struct -(29) Filter +(30) Filter Input [2]: [p_partkey#X, p_type#X] Condition : isnotnull(p_partkey#X) -(30) Exchange +(31) Exchange Input [2]: [p_partkey#X, p_type#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(31) ShuffledHashJoin +(32) Sort +Input [2]: [p_partkey#X, p_type#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(33) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(32) Project +(34) ShuffledHashJoinExecTemp +Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] +Arguments: BuildRight + +(35) Project Output [3]: [l_extendedprice#X, l_discount#X, p_type#X] Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] -(33) HashAggregate +(36) HashAggregate Input [3]: [l_extendedprice#X, l_discount#X, p_type#X] Keys: [] Functions [2]: [partial_sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END), partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Results [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -(34) HashAggregate +(37) HashAggregate Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Keys: [] Functions [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END), sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END)#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [1]: [CheckOverflow((promote_precision(CheckOverflow((100.0000 * promote_precision(sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END)#X)), DecimalType(38,6))) / promote_precision(cast(sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X as decimal(38,6)))), DecimalType(38,6)) AS promo_revenue#X] -(35) AdaptiveSparkPlan +(38) AdaptiveSparkPlan Output [1]: [promo_revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/15.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/15.txt index a45c61431782..3e24d0b84f2b 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/15.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/15.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (42) +AdaptiveSparkPlan (45) +- == Final Plan == VeloxColumnarToRowExec (27) +- AQEShuffleRead (26) @@ -24,20 +24,23 @@ AdaptiveSparkPlan (42) +- ^ FilterExecTransformer (10) +- ^ Scan parquet (9) +- == Initial Plan == - Sort (41) - +- Exchange (40) - +- Project (39) - +- ShuffledHashJoin Inner BuildLeft (38) - :- Exchange (30) - : +- Filter (29) - : +- Scan parquet (28) - +- Filter (37) - +- HashAggregate (36) - +- Exchange (35) - +- HashAggregate (34) - +- Project (33) - +- Filter (32) - +- Scan parquet (31) + Sort (44) + +- Exchange (43) + +- Project (42) + +- ShuffledHashJoinExecTemp (41) + +- SortMergeJoin Inner (40) + :- Sort (31) + : +- Exchange (30) + : +- Filter (29) + : +- Scan parquet (28) + +- Sort (39) + +- Filter (38) + +- HashAggregate (37) + +- Exchange (36) + +- HashAggregate (35) + +- Project (34) + +- Filter (33) + +- Scan parquet (32) (1) Scan parquet @@ -171,216 +174,228 @@ Condition : isnotnull(s_suppkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(31) Scan parquet +(31) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(32) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01), IsNotNull(l_suppkey)] ReadSchema: struct -(32) Filter +(33) Filter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : (((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) AND isnotnull(l_suppkey#X)) -(33) Project +(34) Project Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(34) HashAggregate +(35) HashAggregate Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(35) Exchange +(36) Exchange Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(36) HashAggregate +(37) HashAggregate Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [2]: [l_suppkey#X AS supplier_no#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS total_revenue#X] -(37) Filter +(38) Filter Input [2]: [supplier_no#X, total_revenue#X] Condition : (isnotnull(total_revenue#X) AND (total_revenue#X = Subquery subquery#X, [id=#X])) -(38) ShuffledHashJoin +(39) Sort +Input [2]: [supplier_no#X, total_revenue#X] +Arguments: [supplier_no#X ASC NULLS FIRST], false, 0 + +(40) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [supplier_no#X] Join condition: None -(39) Project +(41) ShuffledHashJoinExecTemp +Input [6]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, supplier_no#X, total_revenue#X] +Arguments: BuildLeft + +(42) Project Output [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Input [6]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, supplier_no#X, total_revenue#X] -(40) Exchange +(43) Exchange Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: rangepartitioning(s_suppkey#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(41) Sort +(44) Sort Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: [s_suppkey#X ASC NULLS FIRST], true, 0 -(42) AdaptiveSparkPlan +(45) AdaptiveSparkPlan Output [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 20 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (67) +AdaptiveSparkPlan (70) +- == Final Plan == - VeloxColumnarToRowExec (58) - +- ^ RegularHashAggregateExecTransformer (56) - +- ^ RegularHashAggregateExecTransformer (55) - +- ^ ProjectExecTransformer (54) - +- ^ RegularHashAggregateExecTransformer (53) - +- ^ InputIteratorTransformer (52) - +- ShuffleQueryStage (50), Statistics(X) - +- ColumnarExchange (49) - +- ^ ProjectExecTransformer (47) - +- ^ FlushableHashAggregateExecTransformer (46) - +- ^ ProjectExecTransformer (45) - +- ^ FilterExecTransformer (44) - +- ^ Scan parquet (43) + VeloxColumnarToRowExec (61) + +- ^ RegularHashAggregateExecTransformer (59) + +- ^ RegularHashAggregateExecTransformer (58) + +- ^ ProjectExecTransformer (57) + +- ^ RegularHashAggregateExecTransformer (56) + +- ^ InputIteratorTransformer (55) + +- ShuffleQueryStage (53), Statistics(X) + +- ColumnarExchange (52) + +- ^ ProjectExecTransformer (50) + +- ^ FlushableHashAggregateExecTransformer (49) + +- ^ ProjectExecTransformer (48) + +- ^ FilterExecTransformer (47) + +- ^ Scan parquet (46) +- == Initial Plan == - HashAggregate (66) - +- HashAggregate (65) - +- HashAggregate (64) - +- Exchange (63) - +- HashAggregate (62) - +- Project (61) - +- Filter (60) - +- Scan parquet (59) + HashAggregate (69) + +- HashAggregate (68) + +- HashAggregate (67) + +- Exchange (66) + +- HashAggregate (65) + +- Project (64) + +- Filter (63) + +- Scan parquet (62) -(43) Scan parquet +(46) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01)] ReadSchema: struct -(44) FilterExecTransformer +(47) FilterExecTransformer Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: ((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) -(45) ProjectExecTransformer +(48) ProjectExecTransformer Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) AS _pre_X#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(46) FlushableHashAggregateExecTransformer +(49) FlushableHashAggregateExecTransformer Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, _pre_X#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum(_pre_X#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(47) ProjectExecTransformer +(50) ProjectExecTransformer Output [4]: [hash(l_suppkey#X, 42) AS hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] Input [3]: [l_suppkey#X, sum#X, isEmpty#X] -(48) WholeStageCodegenTransformer (X) +(51) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] Arguments: false -(49) ColumnarExchange +(52) ColumnarExchange Input [4]: [hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [l_suppkey#X, sum#X, isEmpty#X], [plan_id=X], [id=#X] -(50) ShuffleQueryStage +(53) ShuffleQueryStage Output [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: X -(51) InputAdapter +(54) InputAdapter Input [3]: [l_suppkey#X, sum#X, isEmpty#X] -(52) InputIteratorTransformer +(55) InputIteratorTransformer Input [3]: [l_suppkey#X, sum#X, isEmpty#X] -(53) RegularHashAggregateExecTransformer +(56) RegularHashAggregateExecTransformer Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [2]: [l_suppkey#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] -(54) ProjectExecTransformer +(57) ProjectExecTransformer Output [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS total_revenue#X] Input [2]: [l_suppkey#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] -(55) RegularHashAggregateExecTransformer +(58) RegularHashAggregateExecTransformer Input [1]: [total_revenue#X] Keys: [] Functions [1]: [partial_max(total_revenue#X)] Aggregate Attributes [1]: [max#X] Results [1]: [max#X] -(56) RegularHashAggregateExecTransformer +(59) RegularHashAggregateExecTransformer Input [1]: [max#X] Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(57) WholeStageCodegenTransformer (X) +(60) WholeStageCodegenTransformer (X) Input [1]: [max(total_revenue)#X] Arguments: false -(58) VeloxColumnarToRowExec +(61) VeloxColumnarToRowExec Input [1]: [max(total_revenue)#X] -(59) Scan parquet +(62) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01)] ReadSchema: struct -(60) Filter +(63) Filter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) -(61) Project +(64) Project Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(62) HashAggregate +(65) HashAggregate Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(63) Exchange +(66) Exchange Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(64) HashAggregate +(67) HashAggregate Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS total_revenue#X] -(65) HashAggregate +(68) HashAggregate Input [1]: [total_revenue#X] Keys: [] Functions [1]: [partial_max(total_revenue#X)] Aggregate Attributes [1]: [max#X] Results [1]: [max#X] -(66) HashAggregate +(69) HashAggregate Input [1]: [max#X] Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(67) AdaptiveSparkPlan +(70) AdaptiveSparkPlan Output [1]: [max(total_revenue)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/16.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/16.txt index 9a927b77805e..209832e1de1a 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/16.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/16.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (64) +AdaptiveSparkPlan (67) +- == Final Plan == VeloxColumnarToRowExec (42) +- ^ SortExecTransformer (40) @@ -33,27 +33,30 @@ AdaptiveSparkPlan (64) +- ^ FilterExecTransformer (10) +- ^ Scan parquet (9) +- == Initial Plan == - Sort (63) - +- Exchange (62) - +- HashAggregate (61) - +- Exchange (60) - +- HashAggregate (59) - +- HashAggregate (58) - +- Exchange (57) - +- HashAggregate (56) - +- Project (55) - +- ShuffledHashJoin Inner BuildRight (54) - :- Exchange (50) - : +- BroadcastHashJoin LeftAnti BuildRight (49) - : :- Filter (44) - : : +- Scan parquet (43) - : +- BroadcastExchange (48) - : +- Project (47) - : +- Filter (46) - : +- Scan parquet (45) - +- Exchange (53) - +- Filter (52) - +- Scan parquet (51) + Sort (66) + +- Exchange (65) + +- HashAggregate (64) + +- Exchange (63) + +- HashAggregate (62) + +- HashAggregate (61) + +- Exchange (60) + +- HashAggregate (59) + +- Project (58) + +- ShuffledHashJoinExecTemp (57) + +- SortMergeJoin Inner (56) + :- Sort (51) + : +- Exchange (50) + : +- BroadcastHashJoin LeftAnti BuildRight (49) + : :- Filter (44) + : : +- Scan parquet (43) + : +- BroadcastExchange (48) + : +- Project (47) + : +- Filter (46) + : +- Scan parquet (45) + +- Sort (55) + +- Exchange (54) + +- Filter (53) + +- Scan parquet (52) (1) Scan parquet @@ -271,74 +274,86 @@ Join condition: None Input [2]: [ps_partkey#X, ps_suppkey#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(51) Scan parquet +(51) Sort +Input [2]: [ps_partkey#X, ps_suppkey#X] +Arguments: [ps_partkey#X ASC NULLS FIRST], false, 0 + +(52) Scan parquet Output [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_brand), IsNotNull(p_type), Not(EqualTo(p_brand,Brand#X)), Not(StringStartsWith(p_type,MEDIUM POLISHED)), In(p_size, [14,19,23,3,36,45,49,9]), IsNotNull(p_partkey)] ReadSchema: struct -(52) Filter +(53) Filter Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Condition : (((((isnotnull(p_brand#X) AND isnotnull(p_type#X)) AND NOT (p_brand#X = Brand#X)) AND NOT StartsWith(p_type#X, MEDIUM POLISHED)) AND p_size#X IN (49,14,23,45,19,3,36,9)) AND isnotnull(p_partkey#X)) -(53) Exchange +(54) Exchange Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(54) ShuffledHashJoin +(55) Sort +Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(56) SortMergeJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(55) Project +(57) ShuffledHashJoinExecTemp +Input [6]: [ps_partkey#X, ps_suppkey#X, p_partkey#X, p_brand#X, p_type#X, p_size#X] +Arguments: BuildRight + +(58) Project Output [4]: [ps_suppkey#X, p_brand#X, p_type#X, p_size#X] Input [6]: [ps_partkey#X, ps_suppkey#X, p_partkey#X, p_brand#X, p_type#X, p_size#X] -(56) HashAggregate +(59) HashAggregate Input [4]: [ps_suppkey#X, p_brand#X, p_type#X, p_size#X] Keys [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Functions: [] Aggregate Attributes: [] Results [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] -(57) Exchange +(60) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Arguments: hashpartitioning(p_brand#X, p_type#X, p_size#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(58) HashAggregate +(61) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Keys [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Functions: [] Aggregate Attributes: [] Results [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] -(59) HashAggregate +(62) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Keys [3]: [p_brand#X, p_type#X, p_size#X] Functions [1]: [partial_count(distinct ps_suppkey#X)] Aggregate Attributes [1]: [count(ps_suppkey#X)#X] Results [4]: [p_brand#X, p_type#X, p_size#X, count#X] -(60) Exchange +(63) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] Arguments: hashpartitioning(p_brand#X, p_type#X, p_size#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(61) HashAggregate +(64) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] Keys [3]: [p_brand#X, p_type#X, p_size#X] Functions [1]: [count(distinct ps_suppkey#X)] Aggregate Attributes [1]: [count(ps_suppkey#X)#X] Results [4]: [p_brand#X, p_type#X, p_size#X, count(ps_suppkey#X)#X AS supplier_cnt#X] -(62) Exchange +(65) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: rangepartitioning(supplier_cnt#X DESC NULLS LAST, p_brand#X ASC NULLS FIRST, p_type#X ASC NULLS FIRST, p_size#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(63) Sort +(66) Sort Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: [supplier_cnt#X DESC NULLS LAST, p_brand#X ASC NULLS FIRST, p_type#X ASC NULLS FIRST, p_size#X ASC NULLS FIRST], true, 0 -(64) AdaptiveSparkPlan +(67) AdaptiveSparkPlan Output [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/17.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/17.txt index 48ba7ddab093..c270e7e068b5 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/17.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/17.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (57) +AdaptiveSparkPlan (63) +- == Final Plan == VeloxColumnarToRowExec (37) +- ^ ProjectExecTransformer (35) @@ -32,25 +32,31 @@ AdaptiveSparkPlan (57) +- ^ FilterExecTransformer (20) +- ^ Scan parquet (19) +- == Initial Plan == - HashAggregate (56) - +- HashAggregate (55) - +- Project (54) - +- ShuffledHashJoin Inner BuildRight (53) - :- Project (46) - : +- ShuffledHashJoin Inner BuildRight (45) - : :- Exchange (40) - : : +- Filter (39) - : : +- Scan parquet (38) - : +- Exchange (44) - : +- Project (43) - : +- Filter (42) - : +- Scan parquet (41) - +- Filter (52) - +- HashAggregate (51) - +- Exchange (50) - +- HashAggregate (49) - +- Filter (48) - +- Scan parquet (47) + HashAggregate (62) + +- HashAggregate (61) + +- Project (60) + +- ShuffledHashJoinExecTemp (59) + +- SortMergeJoin Inner (58) + :- Sort (50) + : +- Project (49) + : +- ShuffledHashJoinExecTemp (48) + : +- SortMergeJoin Inner (47) + : :- Sort (41) + : : +- Exchange (40) + : : +- Filter (39) + : : +- Scan parquet (38) + : +- Sort (46) + : +- Exchange (45) + : +- Project (44) + : +- Filter (43) + : +- Scan parquet (42) + +- Sort (57) + +- Filter (56) + +- HashAggregate (55) + +- Exchange (54) + +- HashAggregate (53) + +- Filter (52) + +- Scan parquet (51) (1) Scan parquet @@ -232,90 +238,114 @@ Condition : (isnotnull(l_partkey#X) AND isnotnull(l_quantity#X)) Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(41) Scan parquet +(41) Sort +Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(42) Scan parquet Output [3]: [p_partkey#X, p_brand#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_brand), IsNotNull(p_container), EqualTo(p_brand,Brand#X), EqualTo(p_container,MED BOX), IsNotNull(p_partkey)] ReadSchema: struct -(42) Filter +(43) Filter Input [3]: [p_partkey#X, p_brand#X, p_container#X] Condition : ((((isnotnull(p_brand#X) AND isnotnull(p_container#X)) AND (p_brand#X = Brand#X)) AND (p_container#X = MED BOX)) AND isnotnull(p_partkey#X)) -(43) Project +(44) Project Output [1]: [p_partkey#X] Input [3]: [p_partkey#X, p_brand#X, p_container#X] -(44) Exchange +(45) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(45) ShuffledHashJoin +(46) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(47) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(46) Project +(48) ShuffledHashJoinExecTemp +Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, p_partkey#X] +Arguments: BuildRight + +(49) Project Output [3]: [l_quantity#X, l_extendedprice#X, p_partkey#X] Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, p_partkey#X] -(47) Scan parquet +(50) Sort +Input [3]: [l_quantity#X, l_extendedprice#X, p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(51) Scan parquet Output [2]: [l_partkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey)] ReadSchema: struct -(48) Filter +(52) Filter Input [2]: [l_partkey#X, l_quantity#X] Condition : isnotnull(l_partkey#X) -(49) HashAggregate +(53) HashAggregate Input [2]: [l_partkey#X, l_quantity#X] Keys [1]: [l_partkey#X] Functions [1]: [partial_avg(l_quantity#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [3]: [l_partkey#X, sum#X, count#X] -(50) Exchange +(54) Exchange Input [3]: [l_partkey#X, sum#X, count#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(51) HashAggregate +(55) HashAggregate Input [3]: [l_partkey#X, sum#X, count#X] Keys [1]: [l_partkey#X] Functions [1]: [avg(l_quantity#X)] Aggregate Attributes [1]: [avg(l_quantity#X)#X] Results [2]: [CheckOverflow((0.200000 * promote_precision(avg(l_quantity#X)#X)), DecimalType(18,7)) AS (0.2 * avg(l_quantity))#X, l_partkey#X] -(52) Filter +(56) Filter Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] Condition : isnotnull((0.2 * avg(l_quantity))#X) -(53) ShuffledHashJoin +(57) Sort +Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(58) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join condition: (cast(l_quantity#X as decimal(18,7)) < (0.2 * avg(l_quantity))#X) -(54) Project +(59) ShuffledHashJoinExecTemp +Input [5]: [l_quantity#X, l_extendedprice#X, p_partkey#X, (0.2 * avg(l_quantity))#X, l_partkey#X] +Arguments: BuildRight + +(60) Project Output [1]: [l_extendedprice#X] Input [5]: [l_quantity#X, l_extendedprice#X, p_partkey#X, (0.2 * avg(l_quantity))#X, l_partkey#X] -(55) HashAggregate +(61) HashAggregate Input [1]: [l_extendedprice#X] Keys: [] Functions [1]: [partial_sum(l_extendedprice#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(56) HashAggregate +(62) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(l_extendedprice#X)] Aggregate Attributes [1]: [sum(l_extendedprice#X)#X] Results [1]: [CheckOverflow((promote_precision(sum(l_extendedprice#X)#X) / 7.00), DecimalType(27,6)) AS avg_yearly#X] -(57) AdaptiveSparkPlan +(63) AdaptiveSparkPlan Output [1]: [avg_yearly#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/18.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/18.txt index a15a185bbd77..567eff23313e 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/18.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/18.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (97) +AdaptiveSparkPlan (109) +- == Final Plan == VeloxColumnarToRowExec (64) +- TakeOrderedAndProjectExecTransformer (63) @@ -52,38 +52,50 @@ AdaptiveSparkPlan (97) +- ShuffleQueryStage (51), Statistics(X) +- ReusedExchange (50) +- == Initial Plan == - TakeOrderedAndProject (96) - +- HashAggregate (95) - +- HashAggregate (94) - +- Project (93) - +- ShuffledHashJoin Inner BuildRight (92) - :- Exchange (81) - : +- Project (80) - : +- ShuffledHashJoin Inner BuildLeft (79) - : :- Exchange (67) - : : +- Filter (66) - : : +- Scan parquet (65) - : +- Exchange (78) - : +- ShuffledHashJoin LeftSemi BuildRight (77) - : :- Exchange (70) - : : +- Filter (69) - : : +- Scan parquet (68) - : +- Project (76) - : +- Filter (75) - : +- HashAggregate (74) - : +- Exchange (73) - : +- HashAggregate (72) - : +- Scan parquet (71) - +- ShuffledHashJoin LeftSemi BuildRight (91) - :- Exchange (84) - : +- Filter (83) - : +- Scan parquet (82) - +- Project (90) - +- Filter (89) - +- HashAggregate (88) - +- Exchange (87) - +- HashAggregate (86) - +- Scan parquet (85) + TakeOrderedAndProject (108) + +- HashAggregate (107) + +- HashAggregate (106) + +- Project (105) + +- ShuffledHashJoinExecTemp (104) + +- SortMergeJoin Inner (103) + :- Sort (88) + : +- Exchange (87) + : +- Project (86) + : +- ShuffledHashJoinExecTemp (85) + : +- SortMergeJoin Inner (84) + : :- Sort (68) + : : +- Exchange (67) + : : +- Filter (66) + : : +- Scan parquet (65) + : +- Sort (83) + : +- Exchange (82) + : +- ShuffledHashJoinExecTemp (81) + : +- SortMergeJoin LeftSemi (80) + : :- Sort (72) + : : +- Exchange (71) + : : +- Filter (70) + : : +- Scan parquet (69) + : +- Sort (79) + : +- Project (78) + : +- Filter (77) + : +- HashAggregate (76) + : +- Exchange (75) + : +- HashAggregate (74) + : +- Scan parquet (73) + +- Sort (102) + +- ShuffledHashJoinExecTemp (101) + +- SortMergeJoin LeftSemi (100) + :- Sort (92) + : +- Exchange (91) + : +- Filter (90) + : +- Scan parquet (89) + +- Sort (99) + +- Project (98) + +- Filter (97) + +- HashAggregate (96) + +- Exchange (95) + +- HashAggregate (94) + +- Scan parquet (93) (1) Scan parquet @@ -371,154 +383,202 @@ Condition : isnotnull(c_custkey#X) Input [2]: [c_custkey#X, c_name#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(68) Scan parquet +(68) Sort +Input [2]: [c_custkey#X, c_name#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(69) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(69) Filter +(70) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Condition : (isnotnull(o_custkey#X) AND isnotnull(o_orderkey#X)) -(70) Exchange +(71) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(71) Scan parquet +(72) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(73) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(72) HashAggregate +(74) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(73) Exchange +(75) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) HashAggregate +(76) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(75) Filter +(77) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(76) Project +(78) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(77) ShuffledHashJoin +(79) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(80) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(78) Exchange +(81) ShuffledHashJoinExecTemp +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: BuildRight + +(82) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(79) ShuffledHashJoin +(83) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(84) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(80) Project +(85) ShuffledHashJoinExecTemp +Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: BuildLeft + +(86) Project Output [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] -(81) Exchange +(87) Exchange Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(82) Scan parquet +(88) Sort +Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(89) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_orderkey)] ReadSchema: struct -(83) Filter +(90) Filter Input [2]: [l_orderkey#X, l_quantity#X] Condition : isnotnull(l_orderkey#X) -(84) Exchange +(91) Exchange Input [2]: [l_orderkey#X, l_quantity#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(85) Scan parquet +(92) Sort +Input [2]: [l_orderkey#X, l_quantity#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(93) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(86) HashAggregate +(94) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(87) Exchange +(95) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(88) HashAggregate +(96) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(89) Filter +(97) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(90) Project +(98) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(91) ShuffledHashJoin +(99) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(100) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(92) ShuffledHashJoin +(101) ShuffledHashJoinExecTemp +Input [2]: [l_orderkey#X, l_quantity#X] +Arguments: BuildRight + +(102) Sort +Input [2]: [l_orderkey#X, l_quantity#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(103) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(93) Project +(104) ShuffledHashJoinExecTemp +Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_orderkey#X, l_quantity#X] +Arguments: BuildRight + +(105) Project Output [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_orderkey#X, l_quantity#X] -(94) HashAggregate +(106) HashAggregate Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] -(95) HashAggregate +(107) HashAggregate Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity#X)#X AS sum(l_quantity)#X] -(96) TakeOrderedAndProject +(108) TakeOrderedAndProject Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: X, [o_totalprice#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] -(97) AdaptiveSparkPlan +(109) AdaptiveSparkPlan Output [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/19.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/19.txt index 80f0189d5d65..bf9356081d69 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/19.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/19.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (34) +AdaptiveSparkPlan (37) +- == Final Plan == VeloxColumnarToRowExec (22) +- ^ RegularHashAggregateExecTransformer (20) @@ -19,17 +19,20 @@ AdaptiveSparkPlan (34) +- ^ FilterExecTransformer (10) +- ^ Scan parquet (9) +- == Initial Plan == - HashAggregate (33) - +- HashAggregate (32) - +- Project (31) - +- ShuffledHashJoin Inner BuildRight (30) - :- Exchange (26) - : +- Project (25) - : +- Filter (24) - : +- Scan parquet (23) - +- Exchange (29) - +- Filter (28) - +- Scan parquet (27) + HashAggregate (36) + +- HashAggregate (35) + +- Project (34) + +- ShuffledHashJoinExecTemp (33) + +- SortMergeJoin Inner (32) + :- Sort (27) + : +- Exchange (26) + : +- Project (25) + : +- Filter (24) + : +- Scan parquet (23) + +- Sort (31) + +- Exchange (30) + +- Filter (29) + +- Scan parquet (28) (1) Scan parquet @@ -147,44 +150,56 @@ Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipin Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(27) Scan parquet +(27) Sort +Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(28) Scan parquet Output [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_size), GreaterThanOrEqual(p_size,1), IsNotNull(p_partkey), Or(Or(And(And(EqualTo(p_brand,Brand#X),In(p_container, [SM BOX,SM CASE,SM PACK,SM PKG])),LessThanOrEqual(p_size,5)),And(And(EqualTo(p_brand,Brand#X),In(p_container, [MED BAG,MED BOX,MED PACK,MED PKG])),LessThanOrEqual(p_size,10))),And(And(EqualTo(p_brand,Brand#X),In(p_container, [LG BOX,LG CASE,LG PACK,LG PKG])),LessThanOrEqual(p_size,15)))] ReadSchema: struct -(28) Filter +(29) Filter Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Condition : (((isnotnull(p_size#X) AND (p_size#X >= 1)) AND isnotnull(p_partkey#X)) AND (((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (p_size#X <= 5)) OR (((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (p_size#X <= 10))) OR (((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (p_size#X <= 15)))) -(29) Exchange +(30) Exchange Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(30) ShuffledHashJoin +(31) Sort +Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(32) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: (((((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (l_quantity#X >= 1.00)) AND (l_quantity#X <= 11.00)) AND (p_size#X <= 5)) OR (((((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (l_quantity#X >= 10.00)) AND (l_quantity#X <= 20.00)) AND (p_size#X <= 10))) OR (((((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (l_quantity#X >= 20.00)) AND (l_quantity#X <= 30.00)) AND (p_size#X <= 15))) -(31) Project +(33) ShuffledHashJoinExecTemp +Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_brand#X, p_size#X, p_container#X] +Arguments: BuildRight + +(34) Project Output [2]: [l_extendedprice#X, l_discount#X] Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_brand#X, p_size#X, p_container#X] -(32) HashAggregate +(35) HashAggregate Input [2]: [l_extendedprice#X, l_discount#X] Keys: [] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(33) HashAggregate +(36) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS revenue#X] -(34) AdaptiveSparkPlan +(37) AdaptiveSparkPlan Output [1]: [revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/20.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/20.txt index a0dcc7029a35..f2dbc19104f3 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/20.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/20.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (123) +AdaptiveSparkPlan (138) +- == Final Plan == VeloxColumnarToRowExec (83) +- AQEShuffleRead (82) @@ -65,45 +65,60 @@ AdaptiveSparkPlan (123) +- ^ FilterExecTransformer (70) +- ^ Scan parquet (69) +- == Initial Plan == - Sort (122) - +- Exchange (121) - +- Project (120) - +- ShuffledHashJoin Inner BuildRight (119) - :- Exchange (114) - : +- Project (113) - : +- ShuffledHashJoin LeftSemi BuildRight (112) - : :- Exchange (86) - : : +- Filter (85) - : : +- Scan parquet (84) - : +- Exchange (111) - : +- Project (110) - : +- ShuffledHashJoin Inner BuildLeft (109) - : :- Exchange (95) - : : +- ShuffledHashJoin LeftSemi BuildRight (94) - : : :- Exchange (89) - : : : +- Filter (88) - : : : +- Scan parquet (87) - : : +- Exchange (93) - : : +- Project (92) - : : +- Filter (91) - : : +- Scan parquet (90) - : +- Exchange (108) - : +- Filter (107) - : +- HashAggregate (106) - : +- HashAggregate (105) - : +- ShuffledHashJoin LeftSemi BuildRight (104) - : :- Exchange (99) - : : +- Project (98) - : : +- Filter (97) - : : +- Scan parquet (96) - : +- Exchange (103) - : +- Project (102) - : +- Filter (101) - : +- Scan parquet (100) - +- Exchange (118) - +- Project (117) - +- Filter (116) - +- Scan parquet (115) + Sort (137) + +- Exchange (136) + +- Project (135) + +- ShuffledHashJoinExecTemp (134) + +- SortMergeJoin Inner (133) + :- Sort (127) + : +- Exchange (126) + : +- Project (125) + : +- ShuffledHashJoinExecTemp (124) + : +- SortMergeJoin LeftSemi (123) + : :- Sort (87) + : : +- Exchange (86) + : : +- Filter (85) + : : +- Scan parquet (84) + : +- Sort (122) + : +- Exchange (121) + : +- Project (120) + : +- ShuffledHashJoinExecTemp (119) + : +- SortMergeJoin Inner (118) + : :- Sort (100) + : : +- Exchange (99) + : : +- ShuffledHashJoinExecTemp (98) + : : +- SortMergeJoin LeftSemi (97) + : : :- Sort (91) + : : : +- Exchange (90) + : : : +- Filter (89) + : : : +- Scan parquet (88) + : : +- Sort (96) + : : +- Exchange (95) + : : +- Project (94) + : : +- Filter (93) + : : +- Scan parquet (92) + : +- Sort (117) + : +- Exchange (116) + : +- Filter (115) + : +- HashAggregate (114) + : +- HashAggregate (113) + : +- ShuffledHashJoinExecTemp (112) + : +- SortMergeJoin LeftSemi (111) + : :- Sort (105) + : : +- Exchange (104) + : : +- Project (103) + : : +- Filter (102) + : : +- Scan parquet (101) + : +- Sort (110) + : +- Exchange (109) + : +- Project (108) + : +- Filter (107) + : +- Scan parquet (106) + +- Sort (132) + +- Exchange (131) + +- Project (130) + +- Filter (129) + +- Scan parquet (128) (1) Scan parquet @@ -457,176 +472,236 @@ Condition : isnotnull(s_nationkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(87) Scan parquet +(87) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(88) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_availqty), IsNotNull(ps_partkey), IsNotNull(ps_suppkey)] ReadSchema: struct -(88) Filter +(89) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Condition : ((isnotnull(ps_availqty#X) AND isnotnull(ps_partkey#X)) AND isnotnull(ps_suppkey#X)) -(89) Exchange +(90) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(90) Scan parquet +(91) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: [ps_partkey#X ASC NULLS FIRST], false, 0 + +(92) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(91) Filter +(93) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(92) Project +(94) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(93) Exchange +(95) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(94) ShuffledHashJoin +(96) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(97) SortMergeJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(95) Exchange +(98) ShuffledHashJoinExecTemp +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: BuildRight + +(99) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(96) Scan parquet +(100) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: [ps_partkey#X ASC NULLS FIRST, ps_suppkey#X ASC NULLS FIRST], false, 0 + +(101) Scan parquet Output [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)] ReadSchema: struct -(97) Filter +(102) Filter Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND isnotnull(l_partkey#X)) AND isnotnull(l_suppkey#X)) -(98) Project +(103) Project Output [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] -(99) Exchange +(104) Exchange Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(100) Scan parquet +(105) Sort +Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(106) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(101) Filter +(107) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(102) Project +(108) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(103) Exchange +(109) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(104) ShuffledHashJoin +(110) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(111) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(105) HashAggregate +(112) ShuffledHashJoinExecTemp +Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] +Arguments: BuildRight + +(113) HashAggregate Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -(106) HashAggregate +(114) HashAggregate Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [CheckOverflow((0.50 * promote_precision(sum(l_quantity#X)#X)), DecimalType(24,3)) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(107) Filter +(115) Filter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Condition : isnotnull((0.5 * sum(l_quantity))#X) -(108) Exchange +(116) Exchange Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(109) ShuffledHashJoin +(117) Sort +Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] +Arguments: [l_partkey#X ASC NULLS FIRST, l_suppkey#X ASC NULLS FIRST], false, 0 + +(118) SortMergeJoin Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join condition: (cast(ps_availqty#X as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(110) Project +(119) ShuffledHashJoinExecTemp +Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] +Arguments: BuildLeft + +(120) Project Output [1]: [ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(111) Exchange +(121) Exchange Input [1]: [ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(112) ShuffledHashJoin +(122) Sort +Input [1]: [ps_suppkey#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(123) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join condition: None -(113) Project +(124) ShuffledHashJoinExecTemp +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Arguments: BuildRight + +(125) Project Output [3]: [s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(114) Exchange +(126) Exchange Input [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(115) Scan parquet +(127) Sort +Input [3]: [s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(128) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(116) Filter +(129) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(117) Project +(130) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(118) Exchange +(131) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(119) ShuffledHashJoin +(132) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(133) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(120) Project +(134) ShuffledHashJoinExecTemp +Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] +Arguments: BuildRight + +(135) Project Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(121) Exchange +(136) Exchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(122) Sort +(137) Sort Input [2]: [s_name#X, s_address#X] Arguments: [s_name#X ASC NULLS FIRST], true, 0 -(123) AdaptiveSparkPlan +(138) AdaptiveSparkPlan Output [2]: [s_name#X, s_address#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/21.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/21.txt index 3635363cfe47..c52e349fa4ef 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/21.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/21.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (118) +AdaptiveSparkPlan (133) +- == Final Plan == VeloxColumnarToRowExec (81) +- ^ RegularHashAggregateExecTransformer (79) @@ -62,42 +62,57 @@ AdaptiveSparkPlan (118) +- ^ FilterExecTransformer (63) +- ^ Scan parquet (62) +- == Initial Plan == - TakeOrderedAndProject (117) - +- HashAggregate (116) - +- Exchange (115) - +- HashAggregate (114) - +- Project (113) - +- ShuffledHashJoin Inner BuildRight (112) - :- Exchange (107) - : +- Project (106) - : +- ShuffledHashJoin Inner BuildRight (105) - : :- Exchange (100) - : : +- Project (99) - : : +- ShuffledHashJoin Inner BuildLeft (98) - : : :- Exchange (84) - : : : +- Filter (83) - : : : +- Scan parquet (82) - : : +- Exchange (97) - : : +- ShuffledHashJoin LeftAnti BuildRight (96) - : : :- ShuffledHashJoin LeftSemi BuildRight (91) - : : : :- Exchange (88) - : : : : +- Project (87) - : : : : +- Filter (86) - : : : : +- Scan parquet (85) - : : : +- Exchange (90) - : : : +- Scan parquet (89) - : : +- Exchange (95) - : : +- Project (94) - : : +- Filter (93) - : : +- Scan parquet (92) - : +- Exchange (104) - : +- Project (103) - : +- Filter (102) - : +- Scan parquet (101) - +- Exchange (111) - +- Project (110) - +- Filter (109) - +- Scan parquet (108) + TakeOrderedAndProject (132) + +- HashAggregate (131) + +- Exchange (130) + +- HashAggregate (129) + +- Project (128) + +- ShuffledHashJoinExecTemp (127) + +- SortMergeJoin Inner (126) + :- Sort (120) + : +- Exchange (119) + : +- Project (118) + : +- ShuffledHashJoinExecTemp (117) + : +- SortMergeJoin Inner (116) + : :- Sort (110) + : : +- Exchange (109) + : : +- Project (108) + : : +- ShuffledHashJoinExecTemp (107) + : : +- SortMergeJoin Inner (106) + : : :- Sort (85) + : : : +- Exchange (84) + : : : +- Filter (83) + : : : +- Scan parquet (82) + : : +- Sort (105) + : : +- Exchange (104) + : : +- ShuffledHashJoinExecTemp (103) + : : +- SortMergeJoin LeftAnti (102) + : : :- Sort (96) + : : : +- ShuffledHashJoinExecTemp (95) + : : : +- SortMergeJoin LeftSemi (94) + : : : :- Sort (90) + : : : : +- Exchange (89) + : : : : +- Project (88) + : : : : +- Filter (87) + : : : : +- Scan parquet (86) + : : : +- Sort (93) + : : : +- Exchange (92) + : : : +- Scan parquet (91) + : : +- Sort (101) + : : +- Exchange (100) + : : +- Project (99) + : : +- Filter (98) + : : +- Scan parquet (97) + : +- Sort (115) + : +- Exchange (114) + : +- Project (113) + : +- Filter (112) + : +- Scan parquet (111) + +- Sort (125) + +- Exchange (124) + +- Project (123) + +- Filter (122) + +- Scan parquet (121) (1) Scan parquet @@ -446,163 +461,223 @@ Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(85) Scan parquet +(85) Sort +Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(86) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(86) Filter +(87) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((((isnotnull(l_receiptdate#X) AND isnotnull(l_commitdate#X)) AND (l_receiptdate#X > l_commitdate#X)) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(87) Project +(88) Project Output [2]: [l_orderkey#X, l_suppkey#X] Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] -(88) Exchange +(89) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(89) Scan parquet +(90) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(91) Scan parquet Output [2]: [l_orderkey#X, l_suppkey#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(90) Exchange +(92) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(91) ShuffledHashJoin +(93) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(94) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: NOT (l_suppkey#X = l_suppkey#X) -(92) Scan parquet +(95) ShuffledHashJoinExecTemp +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: BuildRight + +(96) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(97) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate)] ReadSchema: struct -(93) Filter +(98) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((isnotnull(l_receiptdate#X) AND isnotnull(l_commitdate#X)) AND (l_receiptdate#X > l_commitdate#X)) -(94) Project +(99) Project Output [2]: [l_orderkey#X, l_suppkey#X] Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] -(95) Exchange +(100) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(96) ShuffledHashJoin +(101) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(102) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: NOT (l_suppkey#X = l_suppkey#X) -(97) Exchange +(103) ShuffledHashJoinExecTemp +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: BuildRight + +(104) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(98) ShuffledHashJoin +(105) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(106) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [l_suppkey#X] Join condition: None -(99) Project +(107) ShuffledHashJoinExecTemp +Input [5]: [s_suppkey#X, s_name#X, s_nationkey#X, l_orderkey#X, l_suppkey#X] +Arguments: BuildLeft + +(108) Project Output [3]: [s_name#X, s_nationkey#X, l_orderkey#X] Input [5]: [s_suppkey#X, s_name#X, s_nationkey#X, l_orderkey#X, l_suppkey#X] -(100) Exchange +(109) Exchange Input [3]: [s_name#X, s_nationkey#X, l_orderkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(101) Scan parquet +(110) Sort +Input [3]: [s_name#X, s_nationkey#X, l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(111) Scan parquet Output [2]: [o_orderkey#X, o_orderstatus#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderstatus), EqualTo(o_orderstatus,F), IsNotNull(o_orderkey)] ReadSchema: struct -(102) Filter +(112) Filter Input [2]: [o_orderkey#X, o_orderstatus#X] Condition : ((isnotnull(o_orderstatus#X) AND (o_orderstatus#X = F)) AND isnotnull(o_orderkey#X)) -(103) Project +(113) Project Output [1]: [o_orderkey#X] Input [2]: [o_orderkey#X, o_orderstatus#X] -(104) Exchange +(114) Exchange Input [1]: [o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(105) ShuffledHashJoin +(115) Sort +Input [1]: [o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(116) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(106) Project +(117) ShuffledHashJoinExecTemp +Input [4]: [s_name#X, s_nationkey#X, l_orderkey#X, o_orderkey#X] +Arguments: BuildRight + +(118) Project Output [2]: [s_name#X, s_nationkey#X] Input [4]: [s_name#X, s_nationkey#X, l_orderkey#X, o_orderkey#X] -(107) Exchange +(119) Exchange Input [2]: [s_name#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(108) Scan parquet +(120) Sort +Input [2]: [s_name#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(121) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,SAUDI ARABIA), IsNotNull(n_nationkey)] ReadSchema: struct -(109) Filter +(122) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = SAUDI ARABIA)) AND isnotnull(n_nationkey#X)) -(110) Project +(123) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(111) Exchange +(124) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(112) ShuffledHashJoin +(125) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(126) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(113) Project +(127) ShuffledHashJoinExecTemp +Input [3]: [s_name#X, s_nationkey#X, n_nationkey#X] +Arguments: BuildRight + +(128) Project Output [1]: [s_name#X] Input [3]: [s_name#X, s_nationkey#X, n_nationkey#X] -(114) HashAggregate +(129) HashAggregate Input [1]: [s_name#X] Keys [1]: [s_name#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [s_name#X, count#X] -(115) Exchange +(130) Exchange Input [2]: [s_name#X, count#X] Arguments: hashpartitioning(s_name#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(116) HashAggregate +(131) HashAggregate Input [2]: [s_name#X, count#X] Keys [1]: [s_name#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [s_name#X, count(1)#X AS numwait#X] -(117) TakeOrderedAndProject +(132) TakeOrderedAndProject Input [2]: [s_name#X, numwait#X] Arguments: X, [numwait#X DESC NULLS LAST, s_name#X ASC NULLS FIRST], [s_name#X, numwait#X] -(118) AdaptiveSparkPlan +(133) AdaptiveSparkPlan Output [2]: [s_name#X, numwait#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/22.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/22.txt index 3c18ab436ed2..dd884c4cd149 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/22.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/22.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (46) +AdaptiveSparkPlan (49) +- == Final Plan == VeloxColumnarToRowExec (33) +- ^ SortExecTransformer (31) @@ -26,18 +26,21 @@ AdaptiveSparkPlan (46) +- ^ ProjectExecTransformer (10) +- ^ Scan parquet (9) +- == Initial Plan == - Sort (45) - +- Exchange (44) - +- HashAggregate (43) - +- Exchange (42) - +- HashAggregate (41) - +- Project (40) - +- ShuffledHashJoin LeftAnti BuildRight (39) - :- Exchange (36) - : +- Filter (35) - : +- Scan parquet (34) - +- Exchange (38) - +- Scan parquet (37) + Sort (48) + +- Exchange (47) + +- HashAggregate (46) + +- Exchange (45) + +- HashAggregate (44) + +- Project (43) + +- ShuffledHashJoinExecTemp (42) + +- SortMergeJoin LeftAnti (41) + :- Sort (37) + : +- Exchange (36) + : +- Filter (35) + : +- Scan parquet (34) + +- Sort (40) + +- Exchange (39) + +- Scan parquet (38) (1) Scan parquet @@ -190,185 +193,197 @@ Condition : ((isnotnull(c_acctbal#X) AND substring(c_phone#X, 1, 2) IN (13,31,23 Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(37) Scan parquet +(37) Sort +Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(38) Scan parquet Output [1]: [o_custkey#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(38) Exchange +(39) Exchange Input [1]: [o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(39) ShuffledHashJoin +(40) Sort +Input [1]: [o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(41) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(40) Project +(42) ShuffledHashJoinExecTemp +Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] +Arguments: BuildRight + +(43) Project Output [2]: [substring(c_phone#X, 1, 2) AS cntrycode#X, c_acctbal#X] Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] -(41) HashAggregate +(44) HashAggregate Input [2]: [cntrycode#X, c_acctbal#X] Keys [1]: [cntrycode#X] Functions [2]: [partial_count(1), partial_sum(c_acctbal#X)] Aggregate Attributes [3]: [count#X, sum#X, isEmpty#X] Results [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] -(42) Exchange +(45) Exchange Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] Arguments: hashpartitioning(cntrycode#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) HashAggregate +(46) HashAggregate Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] Keys [1]: [cntrycode#X] Functions [2]: [count(1), sum(c_acctbal#X)] Aggregate Attributes [2]: [count(1)#X, sum(c_acctbal#X)#X] Results [3]: [cntrycode#X, count(1)#X AS numcust#X, sum(c_acctbal#X)#X AS totacctbal#X] -(44) Exchange +(47) Exchange Input [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: rangepartitioning(cntrycode#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(45) Sort +(48) Sort Input [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: [cntrycode#X ASC NULLS FIRST], true, 0 -(46) AdaptiveSparkPlan +(49) AdaptiveSparkPlan Output [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 2 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (65) +AdaptiveSparkPlan (68) +- == Final Plan == - VeloxColumnarToRowExec (58) - +- ^ RegularHashAggregateExecTransformer (56) - +- ^ InputIteratorTransformer (55) - +- ShuffleQueryStage (53), Statistics(X) - +- ColumnarExchange (52) - +- ^ FlushableHashAggregateExecTransformer (50) - +- ^ ProjectExecTransformer (49) - +- ^ FilterExecTransformer (48) - +- ^ Scan parquet (47) + VeloxColumnarToRowExec (61) + +- ^ RegularHashAggregateExecTransformer (59) + +- ^ InputIteratorTransformer (58) + +- ShuffleQueryStage (56), Statistics(X) + +- ColumnarExchange (55) + +- ^ FlushableHashAggregateExecTransformer (53) + +- ^ ProjectExecTransformer (52) + +- ^ FilterExecTransformer (51) + +- ^ Scan parquet (50) +- == Initial Plan == - HashAggregate (64) - +- Exchange (63) - +- HashAggregate (62) - +- Project (61) - +- Filter (60) - +- Scan parquet (59) + HashAggregate (67) + +- Exchange (66) + +- HashAggregate (65) + +- Project (64) + +- Filter (63) + +- Scan parquet (62) -(47) Scan parquet +(50) Scan parquet Output [2]: [c_phone#X, c_acctbal#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_acctbal), GreaterThan(c_acctbal,0.00)] ReadSchema: struct -(48) FilterExecTransformer +(51) FilterExecTransformer Input [2]: [c_phone#X, c_acctbal#X] Arguments: ((isnotnull(c_acctbal#X) AND (c_acctbal#X > 0.00)) AND substring(c_phone#X, 1, 2) IN (13,31,23,29,30,18,17)) -(49) ProjectExecTransformer +(52) ProjectExecTransformer Output [1]: [c_acctbal#X] Input [2]: [c_phone#X, c_acctbal#X] -(50) FlushableHashAggregateExecTransformer +(53) FlushableHashAggregateExecTransformer Input [1]: [c_acctbal#X] Keys: [] Functions [1]: [partial_avg(c_acctbal#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [2]: [sum#X, count#X] -(51) WholeStageCodegenTransformer (X) +(54) WholeStageCodegenTransformer (X) Input [2]: [sum#X, count#X] Arguments: false -(52) ColumnarExchange +(55) ColumnarExchange Input [2]: [sum#X, count#X] Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] -(53) ShuffleQueryStage +(56) ShuffleQueryStage Output [2]: [sum#X, count#X] Arguments: X -(54) InputAdapter +(57) InputAdapter Input [2]: [sum#X, count#X] -(55) InputIteratorTransformer +(58) InputIteratorTransformer Input [2]: [sum#X, count#X] -(56) RegularHashAggregateExecTransformer +(59) RegularHashAggregateExecTransformer Input [2]: [sum#X, count#X] Keys: [] Functions [1]: [avg(c_acctbal#X)] Aggregate Attributes [1]: [avg(c_acctbal#X)#X] Results [1]: [avg(c_acctbal#X)#X AS avg(c_acctbal)#X] -(57) WholeStageCodegenTransformer (X) +(60) WholeStageCodegenTransformer (X) Input [1]: [avg(c_acctbal)#X] Arguments: false -(58) VeloxColumnarToRowExec +(61) VeloxColumnarToRowExec Input [1]: [avg(c_acctbal)#X] -(59) Scan parquet +(62) Scan parquet Output [2]: [c_phone#X, c_acctbal#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_acctbal), GreaterThan(c_acctbal,0.00)] ReadSchema: struct -(60) Filter +(63) Filter Input [2]: [c_phone#X, c_acctbal#X] Condition : ((isnotnull(c_acctbal#X) AND (c_acctbal#X > 0.00)) AND substring(c_phone#X, 1, 2) IN (13,31,23,29,30,18,17)) -(61) Project +(64) Project Output [1]: [c_acctbal#X] Input [2]: [c_phone#X, c_acctbal#X] -(62) HashAggregate +(65) HashAggregate Input [1]: [c_acctbal#X] Keys: [] Functions [1]: [partial_avg(c_acctbal#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [2]: [sum#X, count#X] -(63) Exchange +(66) Exchange Input [2]: [sum#X, count#X] Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=X] -(64) HashAggregate +(67) HashAggregate Input [2]: [sum#X, count#X] Keys: [] Functions [1]: [avg(c_acctbal#X)] Aggregate Attributes [1]: [avg(c_acctbal#X)#X] Results [1]: [avg(c_acctbal#X)#X AS avg(c_acctbal)#X] -(65) AdaptiveSparkPlan +(68) AdaptiveSparkPlan Output [1]: [avg(c_acctbal)#X] Arguments: isFinalPlan=true Subquery:2 Hosting operator id = 1 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (65) +AdaptiveSparkPlan (68) +- == Final Plan == - VeloxColumnarToRowExec (58) - +- ^ RegularHashAggregateExecTransformer (56) - +- ^ InputIteratorTransformer (55) - +- ShuffleQueryStage (53), Statistics(X) - +- ColumnarExchange (52) - +- ^ FlushableHashAggregateExecTransformer (50) - +- ^ ProjectExecTransformer (49) - +- ^ FilterExecTransformer (48) - +- ^ Scan parquet (47) + VeloxColumnarToRowExec (61) + +- ^ RegularHashAggregateExecTransformer (59) + +- ^ InputIteratorTransformer (58) + +- ShuffleQueryStage (56), Statistics(X) + +- ColumnarExchange (55) + +- ^ FlushableHashAggregateExecTransformer (53) + +- ^ ProjectExecTransformer (52) + +- ^ FilterExecTransformer (51) + +- ^ Scan parquet (50) +- == Initial Plan == - HashAggregate (64) - +- Exchange (63) - +- HashAggregate (62) - +- Project (61) - +- Filter (60) - +- Scan parquet (59) \ No newline at end of file + HashAggregate (67) + +- Exchange (66) + +- HashAggregate (65) + +- Project (64) + +- Filter (63) + +- Scan parquet (62) \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/3.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/3.txt index eebd274d4709..72ee625bc1ba 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/3.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/3.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (59) +AdaptiveSparkPlan (65) +- == Final Plan == VeloxColumnarToRowExec (39) +- TakeOrderedAndProjectExecTransformer (38) @@ -32,25 +32,31 @@ AdaptiveSparkPlan (59) +- ^ FilterExecTransformer (25) +- ^ Scan parquet (24) +- == Initial Plan == - TakeOrderedAndProject (58) - +- HashAggregate (57) - +- HashAggregate (56) - +- Project (55) - +- ShuffledHashJoin Inner BuildRight (54) - :- Exchange (49) - : +- Project (48) - : +- ShuffledHashJoin Inner BuildLeft (47) - : :- Exchange (43) - : : +- Project (42) - : : +- Filter (41) - : : +- Scan parquet (40) - : +- Exchange (46) - : +- Filter (45) - : +- Scan parquet (44) - +- Exchange (53) - +- Project (52) - +- Filter (51) - +- Scan parquet (50) + TakeOrderedAndProject (64) + +- HashAggregate (63) + +- HashAggregate (62) + +- Project (61) + +- ShuffledHashJoinExecTemp (60) + +- SortMergeJoin Inner (59) + :- Sort (53) + : +- Exchange (52) + : +- Project (51) + : +- ShuffledHashJoinExecTemp (50) + : +- SortMergeJoin Inner (49) + : :- Sort (44) + : : +- Exchange (43) + : : +- Project (42) + : : +- Filter (41) + : : +- Scan parquet (40) + : +- Sort (48) + : +- Exchange (47) + : +- Filter (46) + : +- Scan parquet (45) + +- Sort (58) + +- Exchange (57) + +- Project (56) + +- Filter (55) + +- Scan parquet (54) (1) Scan parquet @@ -236,80 +242,104 @@ Input [2]: [c_custkey#X, c_mktsegment#X] Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(44) Scan parquet +(44) Sort +Input [1]: [c_custkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(45) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), LessThan(o_orderdate,1995-03-15), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(45) Filter +(46) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Condition : (((isnotnull(o_orderdate#X) AND (o_orderdate#X < 1995-03-15)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(46) Exchange +(47) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) ShuffledHashJoin +(48) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(49) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(48) Project +(50) ShuffledHashJoinExecTemp +Input [5]: [c_custkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] +Arguments: BuildLeft + +(51) Project Output [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Input [5]: [c_custkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] -(49) Exchange +(52) Exchange Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(50) Scan parquet +(53) Sort +Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(54) Scan parquet Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThan(l_shipdate,1995-03-15), IsNotNull(l_orderkey)] ReadSchema: struct -(51) Filter +(55) Filter Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X > 1995-03-15)) AND isnotnull(l_orderkey#X)) -(52) Project +(56) Project Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(53) Exchange +(57) Exchange Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(54) ShuffledHashJoin +(58) Sort +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(59) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(55) Project +(60) ShuffledHashJoinExecTemp +Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: BuildRight + +(61) Project Output [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] -(56) HashAggregate +(62) HashAggregate Input [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] -(57) HashAggregate +(63) HashAggregate Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [4]: [l_orderkey#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS revenue#X, o_orderdate#X, o_shippriority#X] -(58) TakeOrderedAndProject +(64) TakeOrderedAndProject Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: X, [revenue#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] -(59) AdaptiveSparkPlan +(65) AdaptiveSparkPlan Output [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/4.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/4.txt index f05c9a5378c6..150f4d4db524 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/4.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/4.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (50) +AdaptiveSparkPlan (53) +- == Final Plan == VeloxColumnarToRowExec (34) +- ^ SortExecTransformer (32) @@ -27,21 +27,24 @@ AdaptiveSparkPlan (50) +- ^ FilterExecTransformer (10) +- ^ Scan parquet (9) +- == Initial Plan == - Sort (49) - +- Exchange (48) - +- HashAggregate (47) - +- Exchange (46) - +- HashAggregate (45) - +- Project (44) - +- ShuffledHashJoin LeftSemi BuildRight (43) - :- Exchange (38) - : +- Project (37) - : +- Filter (36) - : +- Scan parquet (35) - +- Exchange (42) - +- Project (41) - +- Filter (40) - +- Scan parquet (39) + Sort (52) + +- Exchange (51) + +- HashAggregate (50) + +- Exchange (49) + +- HashAggregate (48) + +- Project (47) + +- ShuffledHashJoinExecTemp (46) + +- SortMergeJoin LeftSemi (45) + :- Sort (39) + : +- Exchange (38) + : +- Project (37) + : +- Filter (36) + : +- Scan parquet (35) + +- Sort (44) + +- Exchange (43) + +- Project (42) + +- Filter (41) + +- Scan parquet (40) (1) Scan parquet @@ -203,60 +206,72 @@ Input [3]: [o_orderkey#X, o_orderdate#X, o_orderpriority#X] Input [2]: [o_orderkey#X, o_orderpriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(39) Scan parquet +(39) Sort +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(40) Scan parquet Output [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate)] ReadSchema: struct -(40) Filter +(41) Filter Input [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((isnotnull(l_commitdate#X) AND isnotnull(l_receiptdate#X)) AND (l_commitdate#X < l_receiptdate#X)) -(41) Project +(42) Project Output [1]: [l_orderkey#X] Input [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] -(42) Exchange +(43) Exchange Input [1]: [l_orderkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) ShuffledHashJoin +(44) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(45) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(44) Project +(46) ShuffledHashJoinExecTemp +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: BuildRight + +(47) Project Output [1]: [o_orderpriority#X] Input [2]: [o_orderkey#X, o_orderpriority#X] -(45) HashAggregate +(48) HashAggregate Input [1]: [o_orderpriority#X] Keys [1]: [o_orderpriority#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [o_orderpriority#X, count#X] -(46) Exchange +(49) Exchange Input [2]: [o_orderpriority#X, count#X] Arguments: hashpartitioning(o_orderpriority#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) HashAggregate +(50) HashAggregate Input [2]: [o_orderpriority#X, count#X] Keys [1]: [o_orderpriority#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [o_orderpriority#X, count(1)#X AS order_count#X] -(48) Exchange +(51) Exchange Input [2]: [o_orderpriority#X, order_count#X] Arguments: rangepartitioning(o_orderpriority#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(49) Sort +(52) Sort Input [2]: [o_orderpriority#X, order_count#X] Arguments: [o_orderpriority#X ASC NULLS FIRST], true, 0 -(50) AdaptiveSparkPlan +(53) AdaptiveSparkPlan Output [2]: [o_orderpriority#X, order_count#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/5.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/5.txt index 027ea4a926d2..4520dfdbddf3 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/5.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/5.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (134) +AdaptiveSparkPlan (149) +- == Final Plan == VeloxColumnarToRowExec (94) +- ^ SortExecTransformer (92) @@ -71,45 +71,60 @@ AdaptiveSparkPlan (134) +- ^ FilterExecTransformer (70) +- ^ Scan parquet (69) +- == Initial Plan == - Sort (133) - +- Exchange (132) - +- HashAggregate (131) - +- Exchange (130) - +- HashAggregate (129) - +- Project (128) - +- ShuffledHashJoin Inner BuildRight (127) - :- Exchange (122) - : +- Project (121) - : +- ShuffledHashJoin Inner BuildRight (120) - : :- Exchange (116) - : : +- Project (115) - : : +- ShuffledHashJoin Inner BuildRight (114) - : : :- Exchange (110) - : : : +- Project (109) - : : : +- ShuffledHashJoin Inner BuildRight (108) - : : : :- Exchange (104) - : : : : +- Project (103) - : : : : +- ShuffledHashJoin Inner BuildLeft (102) - : : : : :- Exchange (97) - : : : : : +- Filter (96) - : : : : : +- Scan parquet (95) - : : : : +- Exchange (101) - : : : : +- Project (100) - : : : : +- Filter (99) - : : : : +- Scan parquet (98) - : : : +- Exchange (107) - : : : +- Filter (106) - : : : +- Scan parquet (105) - : : +- Exchange (113) - : : +- Filter (112) - : : +- Scan parquet (111) - : +- Exchange (119) - : +- Filter (118) - : +- Scan parquet (117) - +- Exchange (126) - +- Project (125) - +- Filter (124) - +- Scan parquet (123) + Sort (148) + +- Exchange (147) + +- HashAggregate (146) + +- Exchange (145) + +- HashAggregate (144) + +- Project (143) + +- ShuffledHashJoinExecTemp (142) + +- SortMergeJoin Inner (141) + :- Sort (135) + : +- Exchange (134) + : +- Project (133) + : +- ShuffledHashJoinExecTemp (132) + : +- SortMergeJoin Inner (131) + : :- Sort (126) + : : +- Exchange (125) + : : +- Project (124) + : : +- ShuffledHashJoinExecTemp (123) + : : +- SortMergeJoin Inner (122) + : : :- Sort (117) + : : : +- Exchange (116) + : : : +- Project (115) + : : : +- ShuffledHashJoinExecTemp (114) + : : : +- SortMergeJoin Inner (113) + : : : :- Sort (108) + : : : : +- Exchange (107) + : : : : +- Project (106) + : : : : +- ShuffledHashJoinExecTemp (105) + : : : : +- SortMergeJoin Inner (104) + : : : : :- Sort (98) + : : : : : +- Exchange (97) + : : : : : +- Filter (96) + : : : : : +- Scan parquet (95) + : : : : +- Sort (103) + : : : : +- Exchange (102) + : : : : +- Project (101) + : : : : +- Filter (100) + : : : : +- Scan parquet (99) + : : : +- Sort (112) + : : : +- Exchange (111) + : : : +- Filter (110) + : : : +- Scan parquet (109) + : : +- Sort (121) + : : +- Exchange (120) + : : +- Filter (119) + : : +- Scan parquet (118) + : +- Sort (130) + : +- Exchange (129) + : +- Filter (128) + : +- Scan parquet (127) + +- Sort (140) + +- Exchange (139) + +- Project (138) + +- Filter (137) + +- Scan parquet (136) (1) Scan parquet @@ -507,176 +522,236 @@ Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(98) Scan parquet +(98) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(99) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1994-01-01), LessThan(o_orderdate,1995-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(99) Filter +(100) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1994-01-01)) AND (o_orderdate#X < 1995-01-01)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(100) Project +(101) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] -(101) Exchange +(102) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(102) ShuffledHashJoin +(103) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(104) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(103) Project +(105) ShuffledHashJoinExecTemp +Input [4]: [c_custkey#X, c_nationkey#X, o_orderkey#X, o_custkey#X] +Arguments: BuildLeft + +(106) Project Output [2]: [c_nationkey#X, o_orderkey#X] Input [4]: [c_custkey#X, c_nationkey#X, o_orderkey#X, o_custkey#X] -(104) Exchange +(107) Exchange Input [2]: [c_nationkey#X, o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(105) Scan parquet +(108) Sort +Input [2]: [c_nationkey#X, o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(109) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_orderkey), IsNotNull(l_suppkey)] ReadSchema: struct -(106) Filter +(110) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Condition : (isnotnull(l_orderkey#X) AND isnotnull(l_suppkey#X)) -(107) Exchange +(111) Exchange Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(108) ShuffledHashJoin +(112) Sort +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(113) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(109) Project +(114) ShuffledHashJoinExecTemp +Input [6]: [c_nationkey#X, o_orderkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: BuildRight + +(115) Project Output [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Input [6]: [c_nationkey#X, o_orderkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] -(110) Exchange +(116) Exchange Input [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(111) Scan parquet +(117) Sort +Input [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST, c_nationkey#X ASC NULLS FIRST], false, 0 + +(118) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(112) Filter +(119) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(113) Exchange +(120) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(121) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST, s_nationkey#X ASC NULLS FIRST], false, 0 + +(122) SortMergeJoin Left keys [2]: [l_suppkey#X, c_nationkey#X] Right keys [2]: [s_suppkey#X, s_nationkey#X] Join condition: None -(115) Project +(123) ShuffledHashJoinExecTemp +Input [6]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] +Arguments: BuildRight + +(124) Project Output [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] Input [6]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(116) Exchange +(125) Exchange Input [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(126) Sort +Input [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(127) Scan parquet Output [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] ReadSchema: struct -(118) Filter +(128) Filter Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Condition : (isnotnull(n_nationkey#X) AND isnotnull(n_regionkey#X)) -(119) Exchange +(129) Exchange Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(130) Sort +Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(131) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(121) Project +(132) ShuffledHashJoinExecTemp +Input [6]: [l_extendedprice#X, l_discount#X, s_nationkey#X, n_nationkey#X, n_name#X, n_regionkey#X] +Arguments: BuildRight + +(133) Project Output [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] Input [6]: [l_extendedprice#X, l_discount#X, s_nationkey#X, n_nationkey#X, n_name#X, n_regionkey#X] -(122) Exchange +(134) Exchange Input [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] Arguments: hashpartitioning(n_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(135) Sort +Input [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] +Arguments: [n_regionkey#X ASC NULLS FIRST], false, 0 + +(136) Scan parquet Output [2]: [r_regionkey#X, r_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(r_name), EqualTo(r_name,ASIA), IsNotNull(r_regionkey)] ReadSchema: struct -(124) Filter +(137) Filter Input [2]: [r_regionkey#X, r_name#X] Condition : ((isnotnull(r_name#X) AND (r_name#X = ASIA)) AND isnotnull(r_regionkey#X)) -(125) Project +(138) Project Output [1]: [r_regionkey#X] Input [2]: [r_regionkey#X, r_name#X] -(126) Exchange +(139) Exchange Input [1]: [r_regionkey#X] Arguments: hashpartitioning(r_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(127) ShuffledHashJoin +(140) Sort +Input [1]: [r_regionkey#X] +Arguments: [r_regionkey#X ASC NULLS FIRST], false, 0 + +(141) SortMergeJoin Left keys [1]: [n_regionkey#X] Right keys [1]: [r_regionkey#X] Join condition: None -(128) Project +(142) ShuffledHashJoinExecTemp +Input [5]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X, r_regionkey#X] +Arguments: BuildRight + +(143) Project Output [3]: [l_extendedprice#X, l_discount#X, n_name#X] Input [5]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X, r_regionkey#X] -(129) HashAggregate +(144) HashAggregate Input [3]: [l_extendedprice#X, l_discount#X, n_name#X] Keys [1]: [n_name#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [n_name#X, sum#X, isEmpty#X] -(130) Exchange +(145) Exchange Input [3]: [n_name#X, sum#X, isEmpty#X] Arguments: hashpartitioning(n_name#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(131) HashAggregate +(146) HashAggregate Input [3]: [n_name#X, sum#X, isEmpty#X] Keys [1]: [n_name#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [2]: [n_name#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS revenue#X] -(132) Exchange +(147) Exchange Input [2]: [n_name#X, revenue#X] Arguments: rangepartitioning(revenue#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(133) Sort +(148) Sort Input [2]: [n_name#X, revenue#X] Arguments: [revenue#X DESC NULLS LAST], true, 0 -(134) AdaptiveSparkPlan +(149) AdaptiveSparkPlan Output [2]: [n_name#X, revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/7.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/7.txt index eb5979fb5d84..5a5da6de8b6f 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/7.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/7.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (128) +AdaptiveSparkPlan (143) +- == Final Plan == VeloxColumnarToRowExec (90) +- ^ SortExecTransformer (88) @@ -68,43 +68,58 @@ AdaptiveSparkPlan (128) +- ShuffleQueryStage (70), Statistics(X) +- ReusedExchange (69) +- == Initial Plan == - Sort (127) - +- Exchange (126) - +- HashAggregate (125) - +- Exchange (124) - +- HashAggregate (123) - +- Project (122) - +- ShuffledHashJoin Inner BuildRight (121) - :- Exchange (117) - : +- Project (116) - : +- ShuffledHashJoin Inner BuildRight (115) - : :- Exchange (111) - : : +- Project (110) - : : +- ShuffledHashJoin Inner BuildRight (109) - : : :- Exchange (105) - : : : +- Project (104) - : : : +- ShuffledHashJoin Inner BuildRight (103) - : : : :- Exchange (99) - : : : : +- Project (98) - : : : : +- ShuffledHashJoin Inner BuildLeft (97) - : : : : :- Exchange (93) - : : : : : +- Filter (92) - : : : : : +- Scan parquet (91) - : : : : +- Exchange (96) - : : : : +- Filter (95) - : : : : +- Scan parquet (94) - : : : +- Exchange (102) - : : : +- Filter (101) - : : : +- Scan parquet (100) - : : +- Exchange (108) - : : +- Filter (107) - : : +- Scan parquet (106) - : +- Exchange (114) - : +- Filter (113) - : +- Scan parquet (112) - +- Exchange (120) - +- Filter (119) - +- Scan parquet (118) + Sort (142) + +- Exchange (141) + +- HashAggregate (140) + +- Exchange (139) + +- HashAggregate (138) + +- Project (137) + +- ShuffledHashJoinExecTemp (136) + +- SortMergeJoin Inner (135) + :- Sort (130) + : +- Exchange (129) + : +- Project (128) + : +- ShuffledHashJoinExecTemp (127) + : +- SortMergeJoin Inner (126) + : :- Sort (121) + : : +- Exchange (120) + : : +- Project (119) + : : +- ShuffledHashJoinExecTemp (118) + : : +- SortMergeJoin Inner (117) + : : :- Sort (112) + : : : +- Exchange (111) + : : : +- Project (110) + : : : +- ShuffledHashJoinExecTemp (109) + : : : +- SortMergeJoin Inner (108) + : : : :- Sort (103) + : : : : +- Exchange (102) + : : : : +- Project (101) + : : : : +- ShuffledHashJoinExecTemp (100) + : : : : +- SortMergeJoin Inner (99) + : : : : :- Sort (94) + : : : : : +- Exchange (93) + : : : : : +- Filter (92) + : : : : : +- Scan parquet (91) + : : : : +- Sort (98) + : : : : +- Exchange (97) + : : : : +- Filter (96) + : : : : +- Scan parquet (95) + : : : +- Sort (107) + : : : +- Exchange (106) + : : : +- Filter (105) + : : : +- Scan parquet (104) + : : +- Sort (116) + : : +- Exchange (115) + : : +- Filter (114) + : : +- Scan parquet (113) + : +- Sort (125) + : +- Exchange (124) + : +- Filter (123) + : +- Scan parquet (122) + +- Sort (134) + +- Exchange (133) + +- Filter (132) + +- Scan parquet (131) (1) Scan parquet @@ -482,168 +497,228 @@ Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(94) Scan parquet +(94) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(95) Scan parquet Output [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1995-01-01), LessThanOrEqual(l_shipdate,1996-12-31), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(95) Filter +(96) Filter Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1995-01-01)) AND (l_shipdate#X <= 1996-12-31)) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(96) Exchange +(97) Exchange Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(97) ShuffledHashJoin +(98) Sort +Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(99) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [l_suppkey#X] Join condition: None -(98) Project +(100) ShuffledHashJoinExecTemp +Input [7]: [s_suppkey#X, s_nationkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: BuildLeft + +(101) Project Output [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Input [7]: [s_suppkey#X, s_nationkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(99) Exchange +(102) Exchange Input [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(100) Scan parquet +(103) Sort +Input [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(104) Scan parquet Output [2]: [o_orderkey#X, o_custkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderkey), IsNotNull(o_custkey)] ReadSchema: struct -(101) Filter +(105) Filter Input [2]: [o_orderkey#X, o_custkey#X] Condition : (isnotnull(o_orderkey#X) AND isnotnull(o_custkey#X)) -(102) Exchange +(106) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(103) ShuffledHashJoin +(107) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(108) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(104) Project +(109) ShuffledHashJoinExecTemp +Input [7]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_orderkey#X, o_custkey#X] +Arguments: BuildRight + +(110) Project Output [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] Input [7]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_orderkey#X, o_custkey#X] -(105) Exchange +(111) Exchange Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(106) Scan parquet +(112) Sort +Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(113) Scan parquet Output [2]: [c_custkey#X, c_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] ReadSchema: struct -(107) Filter +(114) Filter Input [2]: [c_custkey#X, c_nationkey#X] Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) -(108) Exchange +(115) Exchange Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(109) ShuffledHashJoin +(116) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(117) SortMergeJoin Left keys [1]: [o_custkey#X] Right keys [1]: [c_custkey#X] Join condition: None -(110) Project +(118) ShuffledHashJoinExecTemp +Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X, c_custkey#X, c_nationkey#X] +Arguments: BuildRight + +(119) Project Output [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X, c_custkey#X, c_nationkey#X] -(111) Exchange +(120) Exchange Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(112) Scan parquet +(121) Sort +Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(122) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), Or(EqualTo(n_name,FRANCE),EqualTo(n_name,GERMANY))] ReadSchema: struct -(113) Filter +(123) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : (isnotnull(n_nationkey#X) AND ((n_name#X = FRANCE) OR (n_name#X = GERMANY))) -(114) Exchange +(124) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(115) ShuffledHashJoin +(125) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(126) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(116) Project +(127) ShuffledHashJoinExecTemp +Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_nationkey#X, n_name#X] +Arguments: BuildRight + +(128) Project Output [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_nationkey#X, n_name#X] -(117) Exchange +(129) Exchange Input [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(118) Scan parquet +(130) Sort +Input [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(131) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), Or(EqualTo(n_name,GERMANY),EqualTo(n_name,FRANCE))] ReadSchema: struct -(119) Filter +(132) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : (isnotnull(n_nationkey#X) AND ((n_name#X = GERMANY) OR (n_name#X = FRANCE))) -(120) Exchange +(133) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(121) ShuffledHashJoin +(134) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(135) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: (((n_name#X = FRANCE) AND (n_name#X = GERMANY)) OR ((n_name#X = GERMANY) AND (n_name#X = FRANCE))) -(122) Project +(136) ShuffledHashJoinExecTemp +Input [7]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X, n_nationkey#X, n_name#X] +Arguments: BuildRight + +(137) Project Output [4]: [n_name#X AS supp_nation#X, n_name#X AS cust_nation#X, year(l_shipdate#X) AS l_year#X, CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) AS volume#X] Input [7]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X, n_nationkey#X, n_name#X] -(123) HashAggregate +(138) HashAggregate Input [4]: [supp_nation#X, cust_nation#X, l_year#X, volume#X] Keys [3]: [supp_nation#X, cust_nation#X, l_year#X] Functions [1]: [partial_sum(volume#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] -(124) Exchange +(139) Exchange Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] Arguments: hashpartitioning(supp_nation#X, cust_nation#X, l_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(125) HashAggregate +(140) HashAggregate Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] Keys [3]: [supp_nation#X, cust_nation#X, l_year#X] Functions [1]: [sum(volume#X)] Aggregate Attributes [1]: [sum(volume#X)#X] Results [4]: [supp_nation#X, cust_nation#X, l_year#X, sum(volume#X)#X AS revenue#X] -(126) Exchange +(141) Exchange Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: rangepartitioning(supp_nation#X ASC NULLS FIRST, cust_nation#X ASC NULLS FIRST, l_year#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(127) Sort +(142) Sort Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: [supp_nation#X ASC NULLS FIRST, cust_nation#X ASC NULLS FIRST, l_year#X ASC NULLS FIRST], true, 0 -(128) AdaptiveSparkPlan +(143) AdaptiveSparkPlan Output [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/8.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/8.txt index 98bba133502c..5e8f9e9d30a1 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/8.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/8.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (177) +AdaptiveSparkPlan (198) +- == Final Plan == VeloxColumnarToRowExec (125) +- ^ SortExecTransformer (123) @@ -94,57 +94,78 @@ AdaptiveSparkPlan (177) +- ^ FilterExecTransformer (100) +- ^ Scan parquet (99) +- == Initial Plan == - Sort (176) - +- Exchange (175) - +- HashAggregate (174) - +- Exchange (173) - +- HashAggregate (172) - +- Project (171) - +- ShuffledHashJoin Inner BuildRight (170) - :- Exchange (165) - : +- Project (164) - : +- ShuffledHashJoin Inner BuildRight (163) - : :- Exchange (159) - : : +- Project (158) - : : +- ShuffledHashJoin Inner BuildRight (157) - : : :- Exchange (153) - : : : +- Project (152) - : : : +- ShuffledHashJoin Inner BuildRight (151) - : : : :- Exchange (147) - : : : : +- Project (146) - : : : : +- ShuffledHashJoin Inner BuildRight (145) - : : : : :- Exchange (141) - : : : : : +- Project (140) - : : : : : +- ShuffledHashJoin Inner BuildRight (139) - : : : : : :- Exchange (135) - : : : : : : +- Project (134) - : : : : : : +- ShuffledHashJoin Inner BuildLeft (133) - : : : : : : :- Exchange (129) - : : : : : : : +- Project (128) - : : : : : : : +- Filter (127) - : : : : : : : +- Scan parquet (126) - : : : : : : +- Exchange (132) - : : : : : : +- Filter (131) - : : : : : : +- Scan parquet (130) - : : : : : +- Exchange (138) - : : : : : +- Filter (137) - : : : : : +- Scan parquet (136) - : : : : +- Exchange (144) - : : : : +- Filter (143) - : : : : +- Scan parquet (142) - : : : +- Exchange (150) - : : : +- Filter (149) - : : : +- Scan parquet (148) - : : +- Exchange (156) - : : +- Filter (155) - : : +- Scan parquet (154) - : +- Exchange (162) - : +- Filter (161) - : +- Scan parquet (160) - +- Exchange (169) - +- Project (168) - +- Filter (167) - +- Scan parquet (166) + Sort (197) + +- Exchange (196) + +- HashAggregate (195) + +- Exchange (194) + +- HashAggregate (193) + +- Project (192) + +- ShuffledHashJoinExecTemp (191) + +- SortMergeJoin Inner (190) + :- Sort (184) + : +- Exchange (183) + : +- Project (182) + : +- ShuffledHashJoinExecTemp (181) + : +- SortMergeJoin Inner (180) + : :- Sort (175) + : : +- Exchange (174) + : : +- Project (173) + : : +- ShuffledHashJoinExecTemp (172) + : : +- SortMergeJoin Inner (171) + : : :- Sort (166) + : : : +- Exchange (165) + : : : +- Project (164) + : : : +- ShuffledHashJoinExecTemp (163) + : : : +- SortMergeJoin Inner (162) + : : : :- Sort (157) + : : : : +- Exchange (156) + : : : : +- Project (155) + : : : : +- ShuffledHashJoinExecTemp (154) + : : : : +- SortMergeJoin Inner (153) + : : : : :- Sort (148) + : : : : : +- Exchange (147) + : : : : : +- Project (146) + : : : : : +- ShuffledHashJoinExecTemp (145) + : : : : : +- SortMergeJoin Inner (144) + : : : : : :- Sort (139) + : : : : : : +- Exchange (138) + : : : : : : +- Project (137) + : : : : : : +- ShuffledHashJoinExecTemp (136) + : : : : : : +- SortMergeJoin Inner (135) + : : : : : : :- Sort (130) + : : : : : : : +- Exchange (129) + : : : : : : : +- Project (128) + : : : : : : : +- Filter (127) + : : : : : : : +- Scan parquet (126) + : : : : : : +- Sort (134) + : : : : : : +- Exchange (133) + : : : : : : +- Filter (132) + : : : : : : +- Scan parquet (131) + : : : : : +- Sort (143) + : : : : : +- Exchange (142) + : : : : : +- Filter (141) + : : : : : +- Scan parquet (140) + : : : : +- Sort (152) + : : : : +- Exchange (151) + : : : : +- Filter (150) + : : : : +- Scan parquet (149) + : : : +- Sort (161) + : : : +- Exchange (160) + : : : +- Filter (159) + : : : +- Scan parquet (158) + : : +- Sort (170) + : : +- Exchange (169) + : : +- Filter (168) + : : +- Scan parquet (167) + : +- Sort (179) + : +- Exchange (178) + : +- Filter (177) + : +- Scan parquet (176) + +- Sort (189) + +- Exchange (188) + +- Project (187) + +- Filter (186) + +- Scan parquet (185) (1) Scan parquet @@ -670,228 +691,312 @@ Input [2]: [p_partkey#X, p_type#X] Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(130) Scan parquet +(130) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(131) Scan parquet Output [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(131) Filter +(132) Filter Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Condition : ((isnotnull(l_partkey#X) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(132) Exchange +(133) Exchange Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(133) ShuffledHashJoin +(134) Sort +Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(135) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join condition: None -(134) Project +(136) ShuffledHashJoinExecTemp +Input [6]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: BuildLeft + +(137) Project Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Input [6]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] -(135) Exchange +(138) Exchange Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(136) Scan parquet +(139) Sort +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(140) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(137) Filter +(141) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(138) Exchange +(142) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(139) ShuffledHashJoin +(143) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(144) SortMergeJoin Left keys [1]: [l_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(140) Project +(145) ShuffledHashJoinExecTemp +Input [6]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] +Arguments: BuildRight + +(146) Project Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Input [6]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(141) Exchange +(147) Exchange Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(142) Scan parquet +(148) Sort +Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(149) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1995-01-01), LessThanOrEqual(o_orderdate,1996-12-31), IsNotNull(o_orderkey), IsNotNull(o_custkey)] ReadSchema: struct -(143) Filter +(150) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1995-01-01)) AND (o_orderdate#X <= 1996-12-31)) AND isnotnull(o_orderkey#X)) AND isnotnull(o_custkey#X)) -(144) Exchange +(151) Exchange Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(145) ShuffledHashJoin +(152) Sort +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(153) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(146) Project +(154) ShuffledHashJoinExecTemp +Input [7]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X] +Arguments: BuildRight + +(155) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] Input [7]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X] -(147) Exchange +(156) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(148) Scan parquet +(157) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(158) Scan parquet Output [2]: [c_custkey#X, c_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] ReadSchema: struct -(149) Filter +(159) Filter Input [2]: [c_custkey#X, c_nationkey#X] Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) -(150) Exchange +(160) Exchange Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(151) ShuffledHashJoin +(161) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(162) SortMergeJoin Left keys [1]: [o_custkey#X] Right keys [1]: [c_custkey#X] Join condition: None -(152) Project +(163) ShuffledHashJoinExecTemp +Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X, c_custkey#X, c_nationkey#X] +Arguments: BuildRight + +(164) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X, c_custkey#X, c_nationkey#X] -(153) Exchange +(165) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(154) Scan parquet +(166) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(167) Scan parquet Output [2]: [n_nationkey#X, n_regionkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] ReadSchema: struct -(155) Filter +(168) Filter Input [2]: [n_nationkey#X, n_regionkey#X] Condition : (isnotnull(n_nationkey#X) AND isnotnull(n_regionkey#X)) -(156) Exchange +(169) Exchange Input [2]: [n_nationkey#X, n_regionkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(157) ShuffledHashJoin +(170) Sort +Input [2]: [n_nationkey#X, n_regionkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(171) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(158) Project +(172) ShuffledHashJoinExecTemp +Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X, n_nationkey#X, n_regionkey#X] +Arguments: BuildRight + +(173) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X, n_nationkey#X, n_regionkey#X] -(159) Exchange +(174) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(160) Scan parquet +(175) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(176) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(161) Filter +(177) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(162) Exchange +(178) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(163) ShuffledHashJoin +(179) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(180) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(164) Project +(181) ShuffledHashJoinExecTemp +Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X, n_nationkey#X, n_name#X] +Arguments: BuildRight + +(182) Project Output [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X, n_nationkey#X, n_name#X] -(165) Exchange +(183) Exchange Input [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] Arguments: hashpartitioning(n_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(166) Scan parquet +(184) Sort +Input [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] +Arguments: [n_regionkey#X ASC NULLS FIRST], false, 0 + +(185) Scan parquet Output [2]: [r_regionkey#X, r_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(r_name), EqualTo(r_name,AMERICA), IsNotNull(r_regionkey)] ReadSchema: struct -(167) Filter +(186) Filter Input [2]: [r_regionkey#X, r_name#X] Condition : ((isnotnull(r_name#X) AND (r_name#X = AMERICA)) AND isnotnull(r_regionkey#X)) -(168) Project +(187) Project Output [1]: [r_regionkey#X] Input [2]: [r_regionkey#X, r_name#X] -(169) Exchange +(188) Exchange Input [1]: [r_regionkey#X] Arguments: hashpartitioning(r_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(170) ShuffledHashJoin +(189) Sort +Input [1]: [r_regionkey#X] +Arguments: [r_regionkey#X ASC NULLS FIRST], false, 0 + +(190) SortMergeJoin Left keys [1]: [n_regionkey#X] Right keys [1]: [r_regionkey#X] Join condition: None -(171) Project +(191) ShuffledHashJoinExecTemp +Input [6]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X, r_regionkey#X] +Arguments: BuildRight + +(192) Project Output [3]: [year(o_orderdate#X) AS o_year#X, CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) AS volume#X, n_name#X AS nation#X] Input [6]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X, r_regionkey#X] -(172) HashAggregate +(193) HashAggregate Input [3]: [o_year#X, volume#X, nation#X] Keys [1]: [o_year#X] Functions [2]: [partial_sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END), partial_sum(volume#X)] Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Results [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] -(173) Exchange +(194) Exchange Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] Arguments: hashpartitioning(o_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(174) HashAggregate +(195) HashAggregate Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] Keys [1]: [o_year#X] Functions [2]: [sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END), sum(volume#X)] Aggregate Attributes [2]: [sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X, sum(volume#X)#X] Results [2]: [o_year#X, CheckOverflow((promote_precision(sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X) / promote_precision(sum(volume#X)#X)), DecimalType(38,6)) AS mkt_share#X] -(175) Exchange +(196) Exchange Input [2]: [o_year#X, mkt_share#X] Arguments: rangepartitioning(o_year#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(176) Sort +(197) Sort Input [2]: [o_year#X, mkt_share#X] Arguments: [o_year#X ASC NULLS FIRST], true, 0 -(177) AdaptiveSparkPlan +(198) AdaptiveSparkPlan Output [2]: [o_year#X, mkt_share#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/9.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/9.txt index 7f3917f80457..eadf9ecff68a 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/9.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/9.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (133) +AdaptiveSparkPlan (148) +- == Final Plan == VeloxColumnarToRowExec (94) +- ^ SortExecTransformer (92) @@ -71,44 +71,59 @@ AdaptiveSparkPlan (133) +- ^ FilterExecTransformer (70) +- ^ Scan parquet (69) +- == Initial Plan == - Sort (132) - +- Exchange (131) - +- HashAggregate (130) - +- Exchange (129) - +- HashAggregate (128) - +- Project (127) - +- ShuffledHashJoin Inner BuildRight (126) - :- Exchange (122) - : +- Project (121) - : +- ShuffledHashJoin Inner BuildRight (120) - : :- Exchange (116) - : : +- Project (115) - : : +- ShuffledHashJoin Inner BuildRight (114) - : : :- Exchange (110) - : : : +- Project (109) - : : : +- ShuffledHashJoin Inner BuildRight (108) - : : : :- Exchange (104) - : : : : +- Project (103) - : : : : +- ShuffledHashJoin Inner BuildLeft (102) - : : : : :- Exchange (98) - : : : : : +- Project (97) - : : : : : +- Filter (96) - : : : : : +- Scan parquet (95) - : : : : +- Exchange (101) - : : : : +- Filter (100) - : : : : +- Scan parquet (99) - : : : +- Exchange (107) - : : : +- Filter (106) - : : : +- Scan parquet (105) - : : +- Exchange (113) - : : +- Filter (112) - : : +- Scan parquet (111) - : +- Exchange (119) - : +- Filter (118) - : +- Scan parquet (117) - +- Exchange (125) - +- Filter (124) - +- Scan parquet (123) + Sort (147) + +- Exchange (146) + +- HashAggregate (145) + +- Exchange (144) + +- HashAggregate (143) + +- Project (142) + +- ShuffledHashJoinExecTemp (141) + +- SortMergeJoin Inner (140) + :- Sort (135) + : +- Exchange (134) + : +- Project (133) + : +- ShuffledHashJoinExecTemp (132) + : +- SortMergeJoin Inner (131) + : :- Sort (126) + : : +- Exchange (125) + : : +- Project (124) + : : +- ShuffledHashJoinExecTemp (123) + : : +- SortMergeJoin Inner (122) + : : :- Sort (117) + : : : +- Exchange (116) + : : : +- Project (115) + : : : +- ShuffledHashJoinExecTemp (114) + : : : +- SortMergeJoin Inner (113) + : : : :- Sort (108) + : : : : +- Exchange (107) + : : : : +- Project (106) + : : : : +- ShuffledHashJoinExecTemp (105) + : : : : +- SortMergeJoin Inner (104) + : : : : :- Sort (99) + : : : : : +- Exchange (98) + : : : : : +- Project (97) + : : : : : +- Filter (96) + : : : : : +- Scan parquet (95) + : : : : +- Sort (103) + : : : : +- Exchange (102) + : : : : +- Filter (101) + : : : : +- Scan parquet (100) + : : : +- Sort (112) + : : : +- Exchange (111) + : : : +- Filter (110) + : : : +- Scan parquet (109) + : : +- Sort (121) + : : +- Exchange (120) + : : +- Filter (119) + : : +- Scan parquet (118) + : +- Sort (130) + : +- Exchange (129) + : +- Filter (128) + : +- Scan parquet (127) + +- Sort (139) + +- Exchange (138) + +- Filter (137) + +- Scan parquet (136) (1) Scan parquet @@ -510,168 +525,228 @@ Input [2]: [p_partkey#X, p_name#X] Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(99) Scan parquet +(99) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(100) Scan parquet Output [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(100) Filter +(101) Filter Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Condition : ((isnotnull(l_partkey#X) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(101) Exchange +(102) Exchange Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(102) ShuffledHashJoin +(103) Sort +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(104) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join condition: None -(103) Project +(105) ShuffledHashJoinExecTemp +Input [7]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: BuildLeft + +(106) Project Output [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Input [7]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] -(104) Exchange +(107) Exchange Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(105) Scan parquet +(108) Sort +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(109) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(106) Filter +(110) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(107) Exchange +(111) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(108) ShuffledHashJoin +(112) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(113) SortMergeJoin Left keys [1]: [l_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(109) Project +(114) ShuffledHashJoinExecTemp +Input [8]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] +Arguments: BuildRight + +(115) Project Output [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Input [8]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(110) Exchange +(116) Exchange Input [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(l_suppkey#X, l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(111) Scan parquet +(117) Sort +Input [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [l_suppkey#X ASC NULLS FIRST, l_partkey#X ASC NULLS FIRST], false, 0 + +(118) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey), IsNotNull(ps_partkey)] ReadSchema: struct -(112) Filter +(119) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Condition : (isnotnull(ps_suppkey#X) AND isnotnull(ps_partkey#X)) -(113) Exchange +(120) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(121) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST, ps_partkey#X ASC NULLS FIRST], false, 0 + +(122) SortMergeJoin Left keys [2]: [l_suppkey#X, l_partkey#X] Right keys [2]: [ps_suppkey#X, ps_partkey#X] Join condition: None -(115) Project +(123) ShuffledHashJoinExecTemp +Input [10]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_partkey#X, ps_suppkey#X, ps_supplycost#X] +Arguments: BuildRight + +(124) Project Output [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] Input [10]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_partkey#X, ps_suppkey#X, ps_supplycost#X] -(116) Exchange +(125) Exchange Input [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(126) Sort +Input [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(127) Scan parquet Output [2]: [o_orderkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderkey)] ReadSchema: struct -(118) Filter +(128) Filter Input [2]: [o_orderkey#X, o_orderdate#X] Condition : isnotnull(o_orderkey#X) -(119) Exchange +(129) Exchange Input [2]: [o_orderkey#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(130) Sort +Input [2]: [o_orderkey#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(131) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(121) Project +(132) ShuffledHashJoinExecTemp +Input [8]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderkey#X, o_orderdate#X] +Arguments: BuildRight + +(133) Project Output [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] Input [8]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderkey#X, o_orderdate#X] -(122) Exchange +(134) Exchange Input [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(135) Sort +Input [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(136) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(124) Filter +(137) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(125) Exchange +(138) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) ShuffledHashJoin +(139) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(140) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(127) Project +(141) ShuffledHashJoinExecTemp +Input [8]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X, n_nationkey#X, n_name#X] +Arguments: BuildRight + +(142) Project Output [3]: [n_name#X AS nation#X, year(o_orderdate#X) AS o_year#X, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) as decimal(27,4))) - promote_precision(cast(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(l_quantity#X)), DecimalType(25,4)) as decimal(27,4)))), DecimalType(27,4)) AS amount#X] Input [8]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X, n_nationkey#X, n_name#X] -(128) HashAggregate +(143) HashAggregate Input [3]: [nation#X, o_year#X, amount#X] Keys [2]: [nation#X, o_year#X] Functions [1]: [partial_sum(amount#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [nation#X, o_year#X, sum#X, isEmpty#X] -(129) Exchange +(144) Exchange Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] Arguments: hashpartitioning(nation#X, o_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(130) HashAggregate +(145) HashAggregate Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] Keys [2]: [nation#X, o_year#X] Functions [1]: [sum(amount#X)] Aggregate Attributes [1]: [sum(amount#X)#X] Results [3]: [nation#X, o_year#X, sum(amount#X)#X AS sum_profit#X] -(131) Exchange +(146) Exchange Input [3]: [nation#X, o_year#X, sum_profit#X] Arguments: rangepartitioning(nation#X ASC NULLS FIRST, o_year#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) Sort +(147) Sort Input [3]: [nation#X, o_year#X, sum_profit#X] Arguments: [nation#X ASC NULLS FIRST, o_year#X DESC NULLS LAST], true, 0 -(133) AdaptiveSparkPlan +(148) AdaptiveSparkPlan Output [3]: [nation#X, o_year#X, sum_profit#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/10.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/10.txt index 02cf374dd013..e40fac76d4fc 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/10.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/10.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (87) +AdaptiveSparkPlan (96) +- == Final Plan == VeloxColumnarToRowExec (60) +- TakeOrderedAndProjectExecTransformer (59) @@ -47,32 +47,41 @@ AdaptiveSparkPlan (87) +- ^ FilterExecTransformer (40) +- ^ Scan parquet (39) +- == Initial Plan == - TakeOrderedAndProject (86) - +- HashAggregate (85) - +- Exchange (84) - +- HashAggregate (83) - +- Project (82) - +- ShuffledHashJoin Inner BuildRight (81) - :- Exchange (77) - : +- Project (76) - : +- ShuffledHashJoin Inner BuildRight (75) - : :- Exchange (70) - : : +- Project (69) - : : +- ShuffledHashJoin Inner BuildRight (68) - : : :- Exchange (63) - : : : +- Filter (62) - : : : +- Scan parquet (61) - : : +- Exchange (67) - : : +- Project (66) - : : +- Filter (65) - : : +- Scan parquet (64) - : +- Exchange (74) - : +- Project (73) - : +- Filter (72) - : +- Scan parquet (71) - +- Exchange (80) - +- Filter (79) - +- Scan parquet (78) + TakeOrderedAndProject (95) + +- HashAggregate (94) + +- Exchange (93) + +- HashAggregate (92) + +- Project (91) + +- ShuffledHashJoinExecTemp (90) + +- SortMergeJoin Inner (89) + :- Sort (84) + : +- Exchange (83) + : +- Project (82) + : +- ShuffledHashJoinExecTemp (81) + : +- SortMergeJoin Inner (80) + : :- Sort (74) + : : +- Exchange (73) + : : +- Project (72) + : : +- ShuffledHashJoinExecTemp (71) + : : +- SortMergeJoin Inner (70) + : : :- Sort (64) + : : : +- Exchange (63) + : : : +- Filter (62) + : : : +- Scan parquet (61) + : : +- Sort (69) + : : +- Exchange (68) + : : +- Project (67) + : : +- Filter (66) + : : +- Scan parquet (65) + : +- Sort (79) + : +- Exchange (78) + : +- Project (77) + : +- Filter (76) + : +- Scan parquet (75) + +- Sort (88) + +- Exchange (87) + +- Filter (86) + +- Scan parquet (85) (1) Scan parquet @@ -339,119 +348,155 @@ Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) Input [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(64) Scan parquet +(64) Sort +Input [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(65) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1993-10-01), LessThan(o_orderdate,1994-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(65) Filter +(66) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1993-10-01)) AND (o_orderdate#X < 1994-01-01)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(66) Project +(67) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] -(67) Exchange +(68) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(68) ShuffledHashJoin +(69) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(70) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join type: Inner Join condition: None -(69) Project +(71) ShuffledHashJoinExecTemp +Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, o_custkey#X] +Arguments: BuildRight + +(72) Project Output [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, o_custkey#X] -(70) Exchange +(73) Exchange Input [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(71) Scan parquet +(74) Sort +Input [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(75) Scan parquet Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_returnflag), EqualTo(l_returnflag,R), IsNotNull(l_orderkey)] ReadSchema: struct -(72) Filter +(76) Filter Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] Condition : ((isnotnull(l_returnflag#X) AND (l_returnflag#X = R)) AND isnotnull(l_orderkey#X)) -(73) Project +(77) Project Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] -(74) Exchange +(78) Exchange Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(75) ShuffledHashJoin +(79) Sort +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(80) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: Inner Join condition: None -(76) Project +(81) ShuffledHashJoinExecTemp +Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: BuildRight + +(82) Project Output [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X] -(77) Exchange +(83) Exchange Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(78) Scan parquet +(84) Sort +Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(85) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(79) Filter +(86) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(80) Exchange +(87) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(81) ShuffledHashJoin +(88) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(89) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(82) Project +(90) ShuffledHashJoinExecTemp +Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_nationkey#X, n_name#X] +Arguments: BuildRight + +(91) Project Output [9]: [c_custkey#X, c_name#X, c_address#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_name#X] Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_nationkey#X, n_name#X] -(83) HashAggregate +(92) HashAggregate Input [9]: [c_custkey#X, c_name#X, c_address#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_name#X] Keys [7]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] -(84) Exchange +(93) Exchange Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] Arguments: hashpartitioning(c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(85) HashAggregate +(94) HashAggregate Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] Keys [7]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [8]: [c_custkey#X, c_name#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] -(86) TakeOrderedAndProject +(95) TakeOrderedAndProject Input [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] Arguments: X, [revenue#X DESC NULLS LAST], [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] -(87) AdaptiveSparkPlan +(96) AdaptiveSparkPlan Output [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/11.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/11.txt index b14866370aab..bbc9ec7c4cc7 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/11.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/11.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (72) +AdaptiveSparkPlan (78) +- == Final Plan == VeloxColumnarToRowExec (50) +- ^ SortExecTransformer (48) @@ -39,27 +39,33 @@ AdaptiveSparkPlan (72) +- ^ FilterExecTransformer (25) +- ^ Scan parquet (24) +- == Initial Plan == - Sort (71) - +- Exchange (70) - +- Filter (69) - +- HashAggregate (68) - +- Exchange (67) - +- HashAggregate (66) - +- Project (65) - +- ShuffledHashJoin Inner BuildRight (64) - :- Exchange (59) - : +- Project (58) - : +- ShuffledHashJoin Inner BuildRight (57) - : :- Exchange (53) - : : +- Filter (52) - : : +- Scan parquet (51) - : +- Exchange (56) - : +- Filter (55) - : +- Scan parquet (54) - +- Exchange (63) - +- Project (62) - +- Filter (61) - +- Scan parquet (60) + Sort (77) + +- Exchange (76) + +- Filter (75) + +- HashAggregate (74) + +- Exchange (73) + +- HashAggregate (72) + +- Project (71) + +- ShuffledHashJoinExecTemp (70) + +- SortMergeJoin Inner (69) + :- Sort (63) + : +- Exchange (62) + : +- Project (61) + : +- ShuffledHashJoinExecTemp (60) + : +- SortMergeJoin Inner (59) + : :- Sort (54) + : : +- Exchange (53) + : : +- Filter (52) + : : +- Scan parquet (51) + : +- Sort (58) + : +- Exchange (57) + : +- Filter (56) + : +- Scan parquet (55) + +- Sort (68) + +- Exchange (67) + +- Project (66) + +- Filter (65) + +- Scan parquet (64) (1) Scan parquet @@ -283,355 +289,409 @@ Condition : isnotnull(ps_suppkey#X) Input [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(54) Scan parquet +(54) Sort +Input [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(55) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(55) Filter +(56) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(56) Exchange +(57) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(57) ShuffledHashJoin +(58) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(59) SortMergeJoin Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join type: Inner Join condition: None -(58) Project +(60) ShuffledHashJoinExecTemp +Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] +Arguments: BuildRight + +(61) Project Output [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(59) Exchange +(62) Exchange Input [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(60) Scan parquet +(63) Sort +Input [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(64) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,GERMANY), IsNotNull(n_nationkey)] ReadSchema: struct -(61) Filter +(65) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = GERMANY)) AND isnotnull(n_nationkey#X)) -(62) Project +(66) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(63) Exchange +(67) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(64) ShuffledHashJoin +(68) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(69) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(65) Project +(70) ShuffledHashJoinExecTemp +Input [5]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] +Arguments: BuildRight + +(71) Project Output [3]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X] Input [5]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(66) HashAggregate +(72) HashAggregate Input [3]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X] Keys [1]: [ps_partkey#X] Functions [1]: [partial_sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [ps_partkey#X, sum#X, isEmpty#X] -(67) Exchange +(73) Exchange Input [3]: [ps_partkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(68) HashAggregate +(74) HashAggregate Input [3]: [ps_partkey#X, sum#X, isEmpty#X] Keys [1]: [ps_partkey#X] Functions [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] Aggregate Attributes [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] Results [2]: [ps_partkey#X, sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X AS value#X] -(69) Filter +(75) Filter Input [2]: [ps_partkey#X, value#X] Condition : (isnotnull(value#X) AND (cast(value#X as decimal(38,6)) > Subquery subquery#X, [id=#X])) -(70) Exchange +(76) Exchange Input [2]: [ps_partkey#X, value#X] Arguments: rangepartitioning(value#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(71) Sort +(77) Sort Input [2]: [ps_partkey#X, value#X] Arguments: [value#X DESC NULLS LAST], true, 0 -(72) AdaptiveSparkPlan +(78) AdaptiveSparkPlan Output [2]: [ps_partkey#X, value#X] Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 42 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (120) +AdaptiveSparkPlan (132) +- == Final Plan == - VeloxColumnarToRowExec (102) - +- ^ ProjectExecTransformer (100) - +- ^ RegularHashAggregateExecTransformer (99) - +- ^ RegularHashAggregateExecTransformer (98) - +- ^ ProjectExecTransformer (97) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (96) - :- ^ InputIteratorTransformer (91) - : +- ShuffleQueryStage (89), Statistics(X) - : +- ColumnarExchange (88) - : +- ^ ProjectExecTransformer (86) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (85) - : :- ^ InputIteratorTransformer (80) - : : +- ShuffleQueryStage (78), Statistics(X) - : : +- ColumnarExchange (77) - : : +- ^ ProjectExecTransformer (75) - : : +- ^ FilterExecTransformer (74) - : : +- ^ Scan parquet (73) - : +- ^ InputIteratorTransformer (84) - : +- ShuffleQueryStage (82), Statistics(X) - : +- ReusedExchange (81) - +- ^ InputIteratorTransformer (95) - +- ShuffleQueryStage (93), Statistics(X) - +- ReusedExchange (92) + VeloxColumnarToRowExec (108) + +- ^ ProjectExecTransformer (106) + +- ^ RegularHashAggregateExecTransformer (105) + +- ^ RegularHashAggregateExecTransformer (104) + +- ^ ProjectExecTransformer (103) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (102) + :- ^ InputIteratorTransformer (97) + : +- ShuffleQueryStage (95), Statistics(X) + : +- ColumnarExchange (94) + : +- ^ ProjectExecTransformer (92) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (91) + : :- ^ InputIteratorTransformer (86) + : : +- ShuffleQueryStage (84), Statistics(X) + : : +- ColumnarExchange (83) + : : +- ^ ProjectExecTransformer (81) + : : +- ^ FilterExecTransformer (80) + : : +- ^ Scan parquet (79) + : +- ^ InputIteratorTransformer (90) + : +- ShuffleQueryStage (88), Statistics(X) + : +- ReusedExchange (87) + +- ^ InputIteratorTransformer (101) + +- ShuffleQueryStage (99), Statistics(X) + +- ReusedExchange (98) +- == Initial Plan == - HashAggregate (119) - +- HashAggregate (118) - +- Project (117) - +- ShuffledHashJoin Inner BuildRight (116) - :- Exchange (111) - : +- Project (110) - : +- ShuffledHashJoin Inner BuildRight (109) - : :- Exchange (105) - : : +- Filter (104) - : : +- Scan parquet (103) - : +- Exchange (108) - : +- Filter (107) - : +- Scan parquet (106) - +- Exchange (115) - +- Project (114) - +- Filter (113) - +- Scan parquet (112) - - -(73) Scan parquet + HashAggregate (131) + +- HashAggregate (130) + +- Project (129) + +- ShuffledHashJoinExecTemp (128) + +- SortMergeJoin Inner (127) + :- Sort (121) + : +- Exchange (120) + : +- Project (119) + : +- ShuffledHashJoinExecTemp (118) + : +- SortMergeJoin Inner (117) + : :- Sort (112) + : : +- Exchange (111) + : : +- Filter (110) + : : +- Scan parquet (109) + : +- Sort (116) + : +- Exchange (115) + : +- Filter (114) + : +- Scan parquet (113) + +- Sort (126) + +- Exchange (125) + +- Project (124) + +- Filter (123) + +- Scan parquet (122) + + +(79) Scan parquet Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey)] ReadSchema: struct -(74) FilterExecTransformer +(80) FilterExecTransformer Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: isnotnull(ps_suppkey#X) -(75) ProjectExecTransformer +(81) ProjectExecTransformer Output [4]: [hash(ps_suppkey#X, 42) AS hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] -(76) WholeStageCodegenTransformer (X) +(82) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: false -(77) ColumnarExchange +(83) ColumnarExchange Input [4]: [hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [ps_suppkey#X, ps_availqty#X, ps_supplycost#X], [plan_id=X], [id=#X] -(78) ShuffleQueryStage +(84) ShuffleQueryStage Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: X -(79) InputAdapter +(85) InputAdapter Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] -(80) InputIteratorTransformer +(86) InputIteratorTransformer Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] -(81) ReusedExchange [Reuses operator id: 13] +(87) ReusedExchange [Reuses operator id: 13] Output [2]: [s_suppkey#X, s_nationkey#X] -(82) ShuffleQueryStage +(88) ShuffleQueryStage Output [2]: [s_suppkey#X, s_nationkey#X] Arguments: X -(83) InputAdapter +(89) InputAdapter Input [2]: [s_suppkey#X, s_nationkey#X] -(84) InputIteratorTransformer +(90) InputIteratorTransformer Input [2]: [s_suppkey#X, s_nationkey#X] -(85) ShuffledHashJoinExecTransformer +(91) ShuffledHashJoinExecTransformer Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join type: Inner Join condition: None -(86) ProjectExecTransformer +(92) ProjectExecTransformer Output [4]: [hash(s_nationkey#X, 42) AS hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [5]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(87) WholeStageCodegenTransformer (X) +(93) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: false -(88) ColumnarExchange +(94) ColumnarExchange Input [4]: [hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [ps_availqty#X, ps_supplycost#X, s_nationkey#X], [plan_id=X], [id=#X] -(89) ShuffleQueryStage +(95) ShuffleQueryStage Output [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: X -(90) InputAdapter +(96) InputAdapter Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] -(91) InputIteratorTransformer +(97) InputIteratorTransformer Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] -(92) ReusedExchange [Reuses operator id: 28] +(98) ReusedExchange [Reuses operator id: 28] Output [1]: [n_nationkey#X] -(93) ShuffleQueryStage +(99) ShuffleQueryStage Output [1]: [n_nationkey#X] Arguments: X -(94) InputAdapter +(100) InputAdapter Input [1]: [n_nationkey#X] -(95) InputIteratorTransformer +(101) InputIteratorTransformer Input [1]: [n_nationkey#X] -(96) ShuffledHashJoinExecTransformer +(102) ShuffledHashJoinExecTransformer Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(97) ProjectExecTransformer +(103) ProjectExecTransformer Output [3]: [ps_availqty#X, ps_supplycost#X, (ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))) AS _pre_X#X] Input [4]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(98) RegularHashAggregateExecTransformer +(104) RegularHashAggregateExecTransformer Input [3]: [ps_availqty#X, ps_supplycost#X, _pre_X#X] Keys: [] Functions [1]: [partial_sum(_pre_X#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(99) RegularHashAggregateExecTransformer +(105) RegularHashAggregateExecTransformer Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] Aggregate Attributes [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] Results [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] -(100) ProjectExecTransformer +(106) ProjectExecTransformer Output [1]: [(sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X * 0.0001000000) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Input [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] -(101) WholeStageCodegenTransformer (X) +(107) WholeStageCodegenTransformer (X) Input [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Arguments: false -(102) VeloxColumnarToRowExec +(108) VeloxColumnarToRowExec Input [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] -(103) Scan parquet +(109) Scan parquet Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey)] ReadSchema: struct -(104) Filter +(110) Filter Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Condition : isnotnull(ps_suppkey#X) -(105) Exchange +(111) Exchange Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(106) Scan parquet +(112) Sort +Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(113) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(107) Filter +(114) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(108) Exchange +(115) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(109) ShuffledHashJoin +(116) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(117) SortMergeJoin Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join type: Inner Join condition: None -(110) Project +(118) ShuffledHashJoinExecTemp +Input [5]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] +Arguments: BuildRight + +(119) Project Output [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [5]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(111) Exchange +(120) Exchange Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(112) Scan parquet +(121) Sort +Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(122) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,GERMANY), IsNotNull(n_nationkey)] ReadSchema: struct -(113) Filter +(123) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = GERMANY)) AND isnotnull(n_nationkey#X)) -(114) Project +(124) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(115) Exchange +(125) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(116) ShuffledHashJoin +(126) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(127) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(117) Project +(128) ShuffledHashJoinExecTemp +Input [4]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] +Arguments: BuildRight + +(129) Project Output [2]: [ps_availqty#X, ps_supplycost#X] Input [4]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(118) HashAggregate +(130) HashAggregate Input [2]: [ps_availqty#X, ps_supplycost#X] Keys: [] Functions [1]: [partial_sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(119) HashAggregate +(131) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] Aggregate Attributes [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] Results [1]: [(sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X * 0.0001000000) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] -(120) AdaptiveSparkPlan +(132) AdaptiveSparkPlan Output [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/12.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/12.txt index 27765da815dd..7c29b655dee2 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/12.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/12.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (49) +AdaptiveSparkPlan (52) +- == Final Plan == VeloxColumnarToRowExec (34) +- ^ SortExecTransformer (32) @@ -27,20 +27,23 @@ AdaptiveSparkPlan (49) +- ^ FilterExecTransformer (10) +- ^ Scan parquet (9) +- == Initial Plan == - Sort (48) - +- Exchange (47) - +- HashAggregate (46) - +- Exchange (45) - +- HashAggregate (44) - +- Project (43) - +- ShuffledHashJoin Inner BuildLeft (42) - :- Exchange (37) - : +- Filter (36) - : +- Scan parquet (35) - +- Exchange (41) - +- Project (40) - +- Filter (39) - +- Scan parquet (38) + Sort (51) + +- Exchange (50) + +- HashAggregate (49) + +- Exchange (48) + +- HashAggregate (47) + +- Project (46) + +- ShuffledHashJoinExecTemp (45) + +- SortMergeJoin Inner (44) + :- Sort (38) + : +- Exchange (37) + : +- Filter (36) + : +- Scan parquet (35) + +- Sort (43) + +- Exchange (42) + +- Project (41) + +- Filter (40) + +- Scan parquet (39) (1) Scan parquet @@ -199,61 +202,73 @@ Condition : isnotnull(o_orderkey#X) Input [2]: [o_orderkey#X, o_orderpriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(38) Scan parquet +(38) Sort +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(39) Scan parquet Output [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate), IsNotNull(l_shipdate), In(l_shipmode, [MAIL,SHIP]), GreaterThanOrEqual(l_receiptdate,1994-01-01), LessThan(l_receiptdate,1995-01-01), IsNotNull(l_orderkey)] ReadSchema: struct -(39) Filter +(40) Filter Input [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] Condition : ((((((((isnotnull(l_commitdate#X) AND isnotnull(l_receiptdate#X)) AND isnotnull(l_shipdate#X)) AND l_shipmode#X IN (MAIL,SHIP)) AND (l_commitdate#X < l_receiptdate#X)) AND (l_shipdate#X < l_commitdate#X)) AND (l_receiptdate#X >= 1994-01-01)) AND (l_receiptdate#X < 1995-01-01)) AND isnotnull(l_orderkey#X)) -(40) Project +(41) Project Output [2]: [l_orderkey#X, l_shipmode#X] Input [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] -(41) Exchange +(42) Exchange Input [2]: [l_orderkey#X, l_shipmode#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(42) ShuffledHashJoin +(43) Sort +Input [2]: [l_orderkey#X, l_shipmode#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(44) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: Inner Join condition: None -(43) Project +(45) ShuffledHashJoinExecTemp +Input [4]: [o_orderkey#X, o_orderpriority#X, l_orderkey#X, l_shipmode#X] +Arguments: BuildLeft + +(46) Project Output [2]: [o_orderpriority#X, l_shipmode#X] Input [4]: [o_orderkey#X, o_orderpriority#X, l_orderkey#X, l_shipmode#X] -(44) HashAggregate +(47) HashAggregate Input [2]: [o_orderpriority#X, l_shipmode#X] Keys [1]: [l_shipmode#X] Functions [2]: [partial_sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)] Aggregate Attributes [2]: [sum#X, sum#X] Results [3]: [l_shipmode#X, sum#X, sum#X] -(45) Exchange +(48) Exchange Input [3]: [l_shipmode#X, sum#X, sum#X] Arguments: hashpartitioning(l_shipmode#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(46) HashAggregate +(49) HashAggregate Input [3]: [l_shipmode#X, sum#X, sum#X] Keys [1]: [l_shipmode#X] Functions [2]: [sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END), sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)] Aggregate Attributes [2]: [sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X, sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X] Results [3]: [l_shipmode#X, sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X AS high_line_count#X, sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X AS low_line_count#X] -(47) Exchange +(50) Exchange Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: rangepartitioning(l_shipmode#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) Sort +(51) Sort Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: [l_shipmode#X ASC NULLS FIRST], true, 0 -(49) AdaptiveSparkPlan +(52) AdaptiveSparkPlan Output [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/13.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/13.txt index 79b9dcd18bd1..a7d240f50230 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/13.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/13.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (52) +AdaptiveSparkPlan (55) +- == Final Plan == VeloxColumnarToRowExec (36) +- ^ SortExecTransformer (34) @@ -29,21 +29,24 @@ AdaptiveSparkPlan (52) +- ^ FilterExecTransformer (9) +- ^ Scan parquet (8) +- == Initial Plan == - Sort (51) - +- Exchange (50) - +- HashAggregate (49) - +- Exchange (48) - +- HashAggregate (47) - +- HashAggregate (46) - +- HashAggregate (45) - +- Project (44) - +- ShuffledHashJoin LeftOuter BuildRight (43) - :- Exchange (38) - : +- Scan parquet (37) - +- Exchange (42) - +- Project (41) - +- Filter (40) - +- Scan parquet (39) + Sort (54) + +- Exchange (53) + +- HashAggregate (52) + +- Exchange (51) + +- HashAggregate (50) + +- HashAggregate (49) + +- HashAggregate (48) + +- Project (47) + +- ShuffledHashJoinExecTemp (46) + +- SortMergeJoin LeftOuter (45) + :- Sort (39) + : +- Exchange (38) + : +- Scan parquet (37) + +- Sort (44) + +- Exchange (43) + +- Project (42) + +- Filter (41) + +- Scan parquet (40) (1) Scan parquet @@ -210,75 +213,87 @@ ReadSchema: struct Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(39) Scan parquet +(39) Sort +Input [1]: [c_custkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(40) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_comment), IsNotNull(o_custkey)] ReadSchema: struct -(40) Filter +(41) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Condition : ((isnotnull(o_comment#X) AND NOT o_comment#X LIKE %special%requests%) AND isnotnull(o_custkey#X)) -(41) Project +(42) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] -(42) Exchange +(43) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) ShuffledHashJoin +(44) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(45) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join type: LeftOuter Join condition: None -(44) Project +(46) ShuffledHashJoinExecTemp +Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] +Arguments: BuildLeft + +(47) Project Output [2]: [c_custkey#X, o_orderkey#X] Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] -(45) HashAggregate +(48) HashAggregate Input [2]: [c_custkey#X, o_orderkey#X] Keys [1]: [c_custkey#X] Functions [1]: [partial_count(o_orderkey#X)] Aggregate Attributes [1]: [count#X] Results [2]: [c_custkey#X, count#X] -(46) HashAggregate +(49) HashAggregate Input [2]: [c_custkey#X, count#X] Keys [1]: [c_custkey#X] Functions [1]: [count(o_orderkey#X)] Aggregate Attributes [1]: [count(o_orderkey#X)#X] Results [1]: [count(o_orderkey#X)#X AS c_count#X] -(47) HashAggregate +(50) HashAggregate Input [1]: [c_count#X] Keys [1]: [c_count#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [c_count#X, count#X] -(48) Exchange +(51) Exchange Input [2]: [c_count#X, count#X] Arguments: hashpartitioning(c_count#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(49) HashAggregate +(52) HashAggregate Input [2]: [c_count#X, count#X] Keys [1]: [c_count#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [c_count#X, count(1)#X AS custdist#X] -(50) Exchange +(53) Exchange Input [2]: [c_count#X, custdist#X] Arguments: rangepartitioning(custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(51) Sort +(54) Sort Input [2]: [c_count#X, custdist#X] Arguments: [custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST], true, 0 -(52) AdaptiveSparkPlan +(55) AdaptiveSparkPlan Output [2]: [c_count#X, custdist#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/14.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/14.txt index 4db67eada562..d7bfe9f15264 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/14.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/14.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (35) +AdaptiveSparkPlan (38) +- == Final Plan == VeloxColumnarToRowExec (23) +- ^ ProjectExecTransformer (21) @@ -20,17 +20,20 @@ AdaptiveSparkPlan (35) +- ^ FilterExecTransformer (10) +- ^ Scan parquet (9) +- == Initial Plan == - HashAggregate (34) - +- HashAggregate (33) - +- Project (32) - +- ShuffledHashJoin Inner BuildRight (31) - :- Exchange (27) - : +- Project (26) - : +- Filter (25) - : +- Scan parquet (24) - +- Exchange (30) - +- Filter (29) - +- Scan parquet (28) + HashAggregate (37) + +- HashAggregate (36) + +- Project (35) + +- ShuffledHashJoinExecTemp (34) + +- SortMergeJoin Inner (33) + :- Sort (28) + : +- Exchange (27) + : +- Project (26) + : +- Filter (25) + : +- Scan parquet (24) + +- Sort (32) + +- Exchange (31) + +- Filter (30) + +- Scan parquet (29) (1) Scan parquet @@ -153,45 +156,57 @@ Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(28) Scan parquet +(28) Sort +Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(29) Scan parquet Output [2]: [p_partkey#X, p_type#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_partkey)] ReadSchema: struct -(29) Filter +(30) Filter Input [2]: [p_partkey#X, p_type#X] Condition : isnotnull(p_partkey#X) -(30) Exchange +(31) Exchange Input [2]: [p_partkey#X, p_type#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(31) ShuffledHashJoin +(32) Sort +Input [2]: [p_partkey#X, p_type#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(33) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join type: Inner Join condition: None -(32) Project +(34) ShuffledHashJoinExecTemp +Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] +Arguments: BuildRight + +(35) Project Output [3]: [l_extendedprice#X, l_discount#X, p_type#X] Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] -(33) HashAggregate +(36) HashAggregate Input [3]: [l_extendedprice#X, l_discount#X, p_type#X] Keys: [] Functions [2]: [partial_sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END), partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Results [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -(34) HashAggregate +(37) HashAggregate Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Keys: [] Functions [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END), sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [1]: [((100.00 * sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X) / sum((l_extendedprice#X * (1 - l_discount#X)))#X) AS promo_revenue#X] -(35) AdaptiveSparkPlan +(38) AdaptiveSparkPlan Output [1]: [promo_revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/15.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/15.txt index c8f4e2c84ac3..aca570cdc553 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/15.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/15.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (42) +AdaptiveSparkPlan (45) +- == Final Plan == VeloxColumnarToRowExec (27) +- AQEShuffleRead (26) @@ -24,20 +24,23 @@ AdaptiveSparkPlan (42) +- ^ FilterExecTransformer (10) +- ^ Scan parquet (9) +- == Initial Plan == - Sort (41) - +- Exchange (40) - +- Project (39) - +- ShuffledHashJoin Inner BuildLeft (38) - :- Exchange (30) - : +- Filter (29) - : +- Scan parquet (28) - +- Filter (37) - +- HashAggregate (36) - +- Exchange (35) - +- HashAggregate (34) - +- Project (33) - +- Filter (32) - +- Scan parquet (31) + Sort (44) + +- Exchange (43) + +- Project (42) + +- ShuffledHashJoinExecTemp (41) + +- SortMergeJoin Inner (40) + :- Sort (31) + : +- Exchange (30) + : +- Filter (29) + : +- Scan parquet (28) + +- Sort (39) + +- Filter (38) + +- HashAggregate (37) + +- Exchange (36) + +- HashAggregate (35) + +- Project (34) + +- Filter (33) + +- Scan parquet (32) (1) Scan parquet @@ -172,217 +175,229 @@ Condition : isnotnull(s_suppkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(31) Scan parquet +(31) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(32) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01), IsNotNull(l_suppkey)] ReadSchema: struct -(32) Filter +(33) Filter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : (((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) AND isnotnull(l_suppkey#X)) -(33) Project +(34) Project Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(34) HashAggregate +(35) HashAggregate Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(35) Exchange +(36) Exchange Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(36) HashAggregate +(37) HashAggregate Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [2]: [l_suppkey#X AS supplier_no#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS total_revenue#X] -(37) Filter +(38) Filter Input [2]: [supplier_no#X, total_revenue#X] Condition : (isnotnull(total_revenue#X) AND (total_revenue#X = Subquery subquery#X, [id=#X])) -(38) ShuffledHashJoin +(39) Sort +Input [2]: [supplier_no#X, total_revenue#X] +Arguments: [supplier_no#X ASC NULLS FIRST], false, 0 + +(40) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [supplier_no#X] Join type: Inner Join condition: None -(39) Project +(41) ShuffledHashJoinExecTemp +Input [6]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, supplier_no#X, total_revenue#X] +Arguments: BuildLeft + +(42) Project Output [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Input [6]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, supplier_no#X, total_revenue#X] -(40) Exchange +(43) Exchange Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: rangepartitioning(s_suppkey#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(41) Sort +(44) Sort Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: [s_suppkey#X ASC NULLS FIRST], true, 0 -(42) AdaptiveSparkPlan +(45) AdaptiveSparkPlan Output [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 20 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (67) +AdaptiveSparkPlan (70) +- == Final Plan == - VeloxColumnarToRowExec (58) - +- ^ RegularHashAggregateExecTransformer (56) - +- ^ RegularHashAggregateExecTransformer (55) - +- ^ ProjectExecTransformer (54) - +- ^ RegularHashAggregateExecTransformer (53) - +- ^ InputIteratorTransformer (52) - +- ShuffleQueryStage (50), Statistics(X) - +- ColumnarExchange (49) - +- ^ ProjectExecTransformer (47) - +- ^ FlushableHashAggregateExecTransformer (46) - +- ^ ProjectExecTransformer (45) - +- ^ FilterExecTransformer (44) - +- ^ Scan parquet (43) + VeloxColumnarToRowExec (61) + +- ^ RegularHashAggregateExecTransformer (59) + +- ^ RegularHashAggregateExecTransformer (58) + +- ^ ProjectExecTransformer (57) + +- ^ RegularHashAggregateExecTransformer (56) + +- ^ InputIteratorTransformer (55) + +- ShuffleQueryStage (53), Statistics(X) + +- ColumnarExchange (52) + +- ^ ProjectExecTransformer (50) + +- ^ FlushableHashAggregateExecTransformer (49) + +- ^ ProjectExecTransformer (48) + +- ^ FilterExecTransformer (47) + +- ^ Scan parquet (46) +- == Initial Plan == - HashAggregate (66) - +- HashAggregate (65) - +- HashAggregate (64) - +- Exchange (63) - +- HashAggregate (62) - +- Project (61) - +- Filter (60) - +- Scan parquet (59) + HashAggregate (69) + +- HashAggregate (68) + +- HashAggregate (67) + +- Exchange (66) + +- HashAggregate (65) + +- Project (64) + +- Filter (63) + +- Scan parquet (62) -(43) Scan parquet +(46) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01)] ReadSchema: struct -(44) FilterExecTransformer +(47) FilterExecTransformer Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: ((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) -(45) ProjectExecTransformer +(48) ProjectExecTransformer Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, (l_extendedprice#X * (1 - l_discount#X)) AS _pre_X#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(46) FlushableHashAggregateExecTransformer +(49) FlushableHashAggregateExecTransformer Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, _pre_X#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum(_pre_X#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(47) ProjectExecTransformer +(50) ProjectExecTransformer Output [4]: [hash(l_suppkey#X, 42) AS hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] Input [3]: [l_suppkey#X, sum#X, isEmpty#X] -(48) WholeStageCodegenTransformer (X) +(51) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] Arguments: false -(49) ColumnarExchange +(52) ColumnarExchange Input [4]: [hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [l_suppkey#X, sum#X, isEmpty#X], [plan_id=X], [id=#X] -(50) ShuffleQueryStage +(53) ShuffleQueryStage Output [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: X -(51) InputAdapter +(54) InputAdapter Input [3]: [l_suppkey#X, sum#X, isEmpty#X] -(52) InputIteratorTransformer +(55) InputIteratorTransformer Input [3]: [l_suppkey#X, sum#X, isEmpty#X] -(53) RegularHashAggregateExecTransformer +(56) RegularHashAggregateExecTransformer Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [2]: [l_suppkey#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] -(54) ProjectExecTransformer +(57) ProjectExecTransformer Output [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X AS total_revenue#X] Input [2]: [l_suppkey#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] -(55) RegularHashAggregateExecTransformer +(58) RegularHashAggregateExecTransformer Input [1]: [total_revenue#X] Keys: [] Functions [1]: [partial_max(total_revenue#X)] Aggregate Attributes [1]: [max#X] Results [1]: [max#X] -(56) RegularHashAggregateExecTransformer +(59) RegularHashAggregateExecTransformer Input [1]: [max#X] Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(57) WholeStageCodegenTransformer (X) +(60) WholeStageCodegenTransformer (X) Input [1]: [max(total_revenue)#X] Arguments: false -(58) VeloxColumnarToRowExec +(61) VeloxColumnarToRowExec Input [1]: [max(total_revenue)#X] -(59) Scan parquet +(62) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01)] ReadSchema: struct -(60) Filter +(63) Filter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) -(61) Project +(64) Project Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(62) HashAggregate +(65) HashAggregate Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(63) Exchange +(66) Exchange Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(64) HashAggregate +(67) HashAggregate Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X AS total_revenue#X] -(65) HashAggregate +(68) HashAggregate Input [1]: [total_revenue#X] Keys: [] Functions [1]: [partial_max(total_revenue#X)] Aggregate Attributes [1]: [max#X] Results [1]: [max#X] -(66) HashAggregate +(69) HashAggregate Input [1]: [max#X] Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(67) AdaptiveSparkPlan +(70) AdaptiveSparkPlan Output [1]: [max(total_revenue)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/16.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/16.txt index 247b853fb1c4..a9d11a39417a 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/16.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/16.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (64) +AdaptiveSparkPlan (67) +- == Final Plan == VeloxColumnarToRowExec (42) +- ^ SortExecTransformer (40) @@ -33,27 +33,30 @@ AdaptiveSparkPlan (64) +- ^ FilterExecTransformer (10) +- ^ Scan parquet (9) +- == Initial Plan == - Sort (63) - +- Exchange (62) - +- HashAggregate (61) - +- Exchange (60) - +- HashAggregate (59) - +- HashAggregate (58) - +- Exchange (57) - +- HashAggregate (56) - +- Project (55) - +- ShuffledHashJoin Inner BuildRight (54) - :- Exchange (50) - : +- BroadcastHashJoin LeftAnti BuildRight (49) - : :- Filter (44) - : : +- Scan parquet (43) - : +- BroadcastExchange (48) - : +- Project (47) - : +- Filter (46) - : +- Scan parquet (45) - +- Exchange (53) - +- Filter (52) - +- Scan parquet (51) + Sort (66) + +- Exchange (65) + +- HashAggregate (64) + +- Exchange (63) + +- HashAggregate (62) + +- HashAggregate (61) + +- Exchange (60) + +- HashAggregate (59) + +- Project (58) + +- ShuffledHashJoinExecTemp (57) + +- SortMergeJoin Inner (56) + :- Sort (51) + : +- Exchange (50) + : +- BroadcastHashJoin LeftAnti BuildRight (49) + : :- Filter (44) + : : +- Scan parquet (43) + : +- BroadcastExchange (48) + : +- Project (47) + : +- Filter (46) + : +- Scan parquet (45) + +- Sort (55) + +- Exchange (54) + +- Filter (53) + +- Scan parquet (52) (1) Scan parquet @@ -273,75 +276,87 @@ Join condition: None Input [2]: [ps_partkey#X, ps_suppkey#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(51) Scan parquet +(51) Sort +Input [2]: [ps_partkey#X, ps_suppkey#X] +Arguments: [ps_partkey#X ASC NULLS FIRST], false, 0 + +(52) Scan parquet Output [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_brand), IsNotNull(p_type), Not(EqualTo(p_brand,Brand#X)), Not(StringStartsWith(p_type,MEDIUM POLISHED)), In(p_size, [14,19,23,3,36,45,49,9]), IsNotNull(p_partkey)] ReadSchema: struct -(52) Filter +(53) Filter Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Condition : (((((isnotnull(p_brand#X) AND isnotnull(p_type#X)) AND NOT (p_brand#X = Brand#X)) AND NOT StartsWith(p_type#X, MEDIUM POLISHED)) AND p_size#X IN (49,14,23,45,19,3,36,9)) AND isnotnull(p_partkey#X)) -(53) Exchange +(54) Exchange Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(54) ShuffledHashJoin +(55) Sort +Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(56) SortMergeJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join type: Inner Join condition: None -(55) Project +(57) ShuffledHashJoinExecTemp +Input [6]: [ps_partkey#X, ps_suppkey#X, p_partkey#X, p_brand#X, p_type#X, p_size#X] +Arguments: BuildRight + +(58) Project Output [4]: [ps_suppkey#X, p_brand#X, p_type#X, p_size#X] Input [6]: [ps_partkey#X, ps_suppkey#X, p_partkey#X, p_brand#X, p_type#X, p_size#X] -(56) HashAggregate +(59) HashAggregate Input [4]: [ps_suppkey#X, p_brand#X, p_type#X, p_size#X] Keys [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Functions: [] Aggregate Attributes: [] Results [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] -(57) Exchange +(60) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Arguments: hashpartitioning(p_brand#X, p_type#X, p_size#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(58) HashAggregate +(61) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Keys [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Functions: [] Aggregate Attributes: [] Results [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] -(59) HashAggregate +(62) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Keys [3]: [p_brand#X, p_type#X, p_size#X] Functions [1]: [partial_count(distinct ps_suppkey#X)] Aggregate Attributes [1]: [count(ps_suppkey#X)#X] Results [4]: [p_brand#X, p_type#X, p_size#X, count#X] -(60) Exchange +(63) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] Arguments: hashpartitioning(p_brand#X, p_type#X, p_size#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(61) HashAggregate +(64) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] Keys [3]: [p_brand#X, p_type#X, p_size#X] Functions [1]: [count(distinct ps_suppkey#X)] Aggregate Attributes [1]: [count(ps_suppkey#X)#X] Results [4]: [p_brand#X, p_type#X, p_size#X, count(ps_suppkey#X)#X AS supplier_cnt#X] -(62) Exchange +(65) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: rangepartitioning(supplier_cnt#X DESC NULLS LAST, p_brand#X ASC NULLS FIRST, p_type#X ASC NULLS FIRST, p_size#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(63) Sort +(66) Sort Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: [supplier_cnt#X DESC NULLS LAST, p_brand#X ASC NULLS FIRST, p_type#X ASC NULLS FIRST, p_size#X ASC NULLS FIRST], true, 0 -(64) AdaptiveSparkPlan +(67) AdaptiveSparkPlan Output [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/17.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/17.txt index fad48184fed1..3c5b796e94c1 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/17.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/17.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (57) +AdaptiveSparkPlan (63) +- == Final Plan == VeloxColumnarToRowExec (37) +- ^ ProjectExecTransformer (35) @@ -32,25 +32,31 @@ AdaptiveSparkPlan (57) +- ^ FilterExecTransformer (20) +- ^ Scan parquet (19) +- == Initial Plan == - HashAggregate (56) - +- HashAggregate (55) - +- Project (54) - +- ShuffledHashJoin Inner BuildRight (53) - :- Project (46) - : +- ShuffledHashJoin Inner BuildRight (45) - : :- Exchange (40) - : : +- Filter (39) - : : +- Scan parquet (38) - : +- Exchange (44) - : +- Project (43) - : +- Filter (42) - : +- Scan parquet (41) - +- Filter (52) - +- HashAggregate (51) - +- Exchange (50) - +- HashAggregate (49) - +- Filter (48) - +- Scan parquet (47) + HashAggregate (62) + +- HashAggregate (61) + +- Project (60) + +- ShuffledHashJoinExecTemp (59) + +- SortMergeJoin Inner (58) + :- Sort (50) + : +- Project (49) + : +- ShuffledHashJoinExecTemp (48) + : +- SortMergeJoin Inner (47) + : :- Sort (41) + : : +- Exchange (40) + : : +- Filter (39) + : : +- Scan parquet (38) + : +- Sort (46) + : +- Exchange (45) + : +- Project (44) + : +- Filter (43) + : +- Scan parquet (42) + +- Sort (57) + +- Filter (56) + +- HashAggregate (55) + +- Exchange (54) + +- HashAggregate (53) + +- Filter (52) + +- Scan parquet (51) (1) Scan parquet @@ -234,92 +240,116 @@ Condition : (isnotnull(l_partkey#X) AND isnotnull(l_quantity#X)) Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(41) Scan parquet +(41) Sort +Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(42) Scan parquet Output [3]: [p_partkey#X, p_brand#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_brand), IsNotNull(p_container), EqualTo(p_brand,Brand#X), EqualTo(p_container,MED BOX), IsNotNull(p_partkey)] ReadSchema: struct -(42) Filter +(43) Filter Input [3]: [p_partkey#X, p_brand#X, p_container#X] Condition : ((((isnotnull(p_brand#X) AND isnotnull(p_container#X)) AND (p_brand#X = Brand#X)) AND (p_container#X = MED BOX)) AND isnotnull(p_partkey#X)) -(43) Project +(44) Project Output [1]: [p_partkey#X] Input [3]: [p_partkey#X, p_brand#X, p_container#X] -(44) Exchange +(45) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(45) ShuffledHashJoin +(46) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(47) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join type: Inner Join condition: None -(46) Project +(48) ShuffledHashJoinExecTemp +Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, p_partkey#X] +Arguments: BuildRight + +(49) Project Output [3]: [l_quantity#X, l_extendedprice#X, p_partkey#X] Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, p_partkey#X] -(47) Scan parquet +(50) Sort +Input [3]: [l_quantity#X, l_extendedprice#X, p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(51) Scan parquet Output [2]: [l_partkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey)] ReadSchema: struct -(48) Filter +(52) Filter Input [2]: [l_partkey#X, l_quantity#X] Condition : isnotnull(l_partkey#X) -(49) HashAggregate +(53) HashAggregate Input [2]: [l_partkey#X, l_quantity#X] Keys [1]: [l_partkey#X] Functions [1]: [partial_avg(l_quantity#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [3]: [l_partkey#X, sum#X, count#X] -(50) Exchange +(54) Exchange Input [3]: [l_partkey#X, sum#X, count#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(51) HashAggregate +(55) HashAggregate Input [3]: [l_partkey#X, sum#X, count#X] Keys [1]: [l_partkey#X] Functions [1]: [avg(l_quantity#X)] Aggregate Attributes [1]: [avg(l_quantity#X)#X] Results [2]: [(0.2 * avg(l_quantity#X)#X) AS (0.2 * avg(l_quantity))#X, l_partkey#X] -(52) Filter +(56) Filter Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] Condition : isnotnull((0.2 * avg(l_quantity))#X) -(53) ShuffledHashJoin +(57) Sort +Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(58) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join type: Inner Join condition: (cast(l_quantity#X as decimal(18,7)) < (0.2 * avg(l_quantity))#X) -(54) Project +(59) ShuffledHashJoinExecTemp +Input [5]: [l_quantity#X, l_extendedprice#X, p_partkey#X, (0.2 * avg(l_quantity))#X, l_partkey#X] +Arguments: BuildRight + +(60) Project Output [1]: [l_extendedprice#X] Input [5]: [l_quantity#X, l_extendedprice#X, p_partkey#X, (0.2 * avg(l_quantity))#X, l_partkey#X] -(55) HashAggregate +(61) HashAggregate Input [1]: [l_extendedprice#X] Keys: [] Functions [1]: [partial_sum(l_extendedprice#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(56) HashAggregate +(62) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(l_extendedprice#X)] Aggregate Attributes [1]: [sum(l_extendedprice#X)#X] Results [1]: [(sum(l_extendedprice#X)#X / 7.0) AS avg_yearly#X] -(57) AdaptiveSparkPlan +(63) AdaptiveSparkPlan Output [1]: [avg_yearly#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/18.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/18.txt index 5a6f04064349..38e8e066593b 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/18.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/18.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (97) +AdaptiveSparkPlan (109) +- == Final Plan == VeloxColumnarToRowExec (64) +- TakeOrderedAndProjectExecTransformer (63) @@ -52,38 +52,50 @@ AdaptiveSparkPlan (97) +- ShuffleQueryStage (51), Statistics(X) +- ReusedExchange (50) +- == Initial Plan == - TakeOrderedAndProject (96) - +- HashAggregate (95) - +- HashAggregate (94) - +- Project (93) - +- ShuffledHashJoin Inner BuildRight (92) - :- Exchange (81) - : +- Project (80) - : +- ShuffledHashJoin Inner BuildLeft (79) - : :- Exchange (67) - : : +- Filter (66) - : : +- Scan parquet (65) - : +- Exchange (78) - : +- ShuffledHashJoin LeftSemi BuildRight (77) - : :- Exchange (70) - : : +- Filter (69) - : : +- Scan parquet (68) - : +- Project (76) - : +- Filter (75) - : +- HashAggregate (74) - : +- Exchange (73) - : +- HashAggregate (72) - : +- Scan parquet (71) - +- ShuffledHashJoin LeftSemi BuildRight (91) - :- Exchange (84) - : +- Filter (83) - : +- Scan parquet (82) - +- Project (90) - +- Filter (89) - +- HashAggregate (88) - +- Exchange (87) - +- HashAggregate (86) - +- Scan parquet (85) + TakeOrderedAndProject (108) + +- HashAggregate (107) + +- HashAggregate (106) + +- Project (105) + +- ShuffledHashJoinExecTemp (104) + +- SortMergeJoin Inner (103) + :- Sort (88) + : +- Exchange (87) + : +- Project (86) + : +- ShuffledHashJoinExecTemp (85) + : +- SortMergeJoin Inner (84) + : :- Sort (68) + : : +- Exchange (67) + : : +- Filter (66) + : : +- Scan parquet (65) + : +- Sort (83) + : +- Exchange (82) + : +- ShuffledHashJoinExecTemp (81) + : +- SortMergeJoin LeftSemi (80) + : :- Sort (72) + : : +- Exchange (71) + : : +- Filter (70) + : : +- Scan parquet (69) + : +- Sort (79) + : +- Project (78) + : +- Filter (77) + : +- HashAggregate (76) + : +- Exchange (75) + : +- HashAggregate (74) + : +- Scan parquet (73) + +- Sort (102) + +- ShuffledHashJoinExecTemp (101) + +- SortMergeJoin LeftSemi (100) + :- Sort (92) + : +- Exchange (91) + : +- Filter (90) + : +- Scan parquet (89) + +- Sort (99) + +- Project (98) + +- Filter (97) + +- HashAggregate (96) + +- Exchange (95) + +- HashAggregate (94) + +- Scan parquet (93) (1) Scan parquet @@ -375,158 +387,206 @@ Condition : isnotnull(c_custkey#X) Input [2]: [c_custkey#X, c_name#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(68) Scan parquet +(68) Sort +Input [2]: [c_custkey#X, c_name#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(69) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(69) Filter +(70) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Condition : (isnotnull(o_custkey#X) AND isnotnull(o_orderkey#X)) -(70) Exchange +(71) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(71) Scan parquet +(72) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(73) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(72) HashAggregate +(74) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(73) Exchange +(75) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) HashAggregate +(76) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(75) Filter +(77) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(76) Project +(78) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(77) ShuffledHashJoin +(79) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(80) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: LeftSemi Join condition: None -(78) Exchange +(81) ShuffledHashJoinExecTemp +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: BuildRight + +(82) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(79) ShuffledHashJoin +(83) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(84) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join type: Inner Join condition: None -(80) Project +(85) ShuffledHashJoinExecTemp +Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: BuildLeft + +(86) Project Output [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] -(81) Exchange +(87) Exchange Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(82) Scan parquet +(88) Sort +Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(89) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_orderkey)] ReadSchema: struct -(83) Filter +(90) Filter Input [2]: [l_orderkey#X, l_quantity#X] Condition : isnotnull(l_orderkey#X) -(84) Exchange +(91) Exchange Input [2]: [l_orderkey#X, l_quantity#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(85) Scan parquet +(92) Sort +Input [2]: [l_orderkey#X, l_quantity#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(93) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(86) HashAggregate +(94) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(87) Exchange +(95) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(88) HashAggregate +(96) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(89) Filter +(97) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(90) Project +(98) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(91) ShuffledHashJoin +(99) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(100) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: LeftSemi Join condition: None -(92) ShuffledHashJoin +(101) ShuffledHashJoinExecTemp +Input [2]: [l_orderkey#X, l_quantity#X] +Arguments: BuildRight + +(102) Sort +Input [2]: [l_orderkey#X, l_quantity#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(103) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: Inner Join condition: None -(93) Project +(104) ShuffledHashJoinExecTemp +Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_orderkey#X, l_quantity#X] +Arguments: BuildRight + +(105) Project Output [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_orderkey#X, l_quantity#X] -(94) HashAggregate +(106) HashAggregate Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] -(95) HashAggregate +(107) HashAggregate Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity#X)#X AS sum(l_quantity)#X] -(96) TakeOrderedAndProject +(108) TakeOrderedAndProject Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: X, [o_totalprice#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] -(97) AdaptiveSparkPlan +(109) AdaptiveSparkPlan Output [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/19.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/19.txt index 2f7bb7995dd9..e3edca717e84 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/19.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/19.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (34) +AdaptiveSparkPlan (37) +- == Final Plan == VeloxColumnarToRowExec (22) +- ^ RegularHashAggregateExecTransformer (20) @@ -19,17 +19,20 @@ AdaptiveSparkPlan (34) +- ^ FilterExecTransformer (10) +- ^ Scan parquet (9) +- == Initial Plan == - HashAggregate (33) - +- HashAggregate (32) - +- Project (31) - +- ShuffledHashJoin Inner BuildRight (30) - :- Exchange (26) - : +- Project (25) - : +- Filter (24) - : +- Scan parquet (23) - +- Exchange (29) - +- Filter (28) - +- Scan parquet (27) + HashAggregate (36) + +- HashAggregate (35) + +- Project (34) + +- ShuffledHashJoinExecTemp (33) + +- SortMergeJoin Inner (32) + :- Sort (27) + : +- Exchange (26) + : +- Project (25) + : +- Filter (24) + : +- Scan parquet (23) + +- Sort (31) + +- Exchange (30) + +- Filter (29) + +- Scan parquet (28) (1) Scan parquet @@ -148,45 +151,57 @@ Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipin Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(27) Scan parquet +(27) Sort +Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(28) Scan parquet Output [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_size), GreaterThanOrEqual(p_size,1), IsNotNull(p_partkey), Or(Or(And(And(EqualTo(p_brand,Brand#X),In(p_container, [SM BOX,SM CASE,SM PACK,SM PKG])),LessThanOrEqual(p_size,5)),And(And(EqualTo(p_brand,Brand#X),In(p_container, [MED BAG,MED BOX,MED PACK,MED PKG])),LessThanOrEqual(p_size,10))),And(And(EqualTo(p_brand,Brand#X),In(p_container, [LG BOX,LG CASE,LG PACK,LG PKG])),LessThanOrEqual(p_size,15)))] ReadSchema: struct -(28) Filter +(29) Filter Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Condition : (((isnotnull(p_size#X) AND (p_size#X >= 1)) AND isnotnull(p_partkey#X)) AND (((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (p_size#X <= 5)) OR (((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (p_size#X <= 10))) OR (((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (p_size#X <= 15)))) -(29) Exchange +(30) Exchange Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(30) ShuffledHashJoin +(31) Sort +Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(32) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join type: Inner Join condition: (((((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (l_quantity#X >= 1.00)) AND (l_quantity#X <= 11.00)) AND (p_size#X <= 5)) OR (((((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (l_quantity#X >= 10.00)) AND (l_quantity#X <= 20.00)) AND (p_size#X <= 10))) OR (((((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (l_quantity#X >= 20.00)) AND (l_quantity#X <= 30.00)) AND (p_size#X <= 15))) -(31) Project +(33) ShuffledHashJoinExecTemp +Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_brand#X, p_size#X, p_container#X] +Arguments: BuildRight + +(34) Project Output [2]: [l_extendedprice#X, l_discount#X] Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_brand#X, p_size#X, p_container#X] -(32) HashAggregate +(35) HashAggregate Input [2]: [l_extendedprice#X, l_discount#X] Keys: [] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(33) HashAggregate +(36) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X] -(34) AdaptiveSparkPlan +(37) AdaptiveSparkPlan Output [1]: [revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/20.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/20.txt index e8aa97a29e7a..60cdc5da4cba 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/20.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/20.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (123) +AdaptiveSparkPlan (138) +- == Final Plan == VeloxColumnarToRowExec (83) +- AQEShuffleRead (82) @@ -65,45 +65,60 @@ AdaptiveSparkPlan (123) +- ^ FilterExecTransformer (70) +- ^ Scan parquet (69) +- == Initial Plan == - Sort (122) - +- Exchange (121) - +- Project (120) - +- ShuffledHashJoin Inner BuildRight (119) - :- Exchange (114) - : +- Project (113) - : +- ShuffledHashJoin LeftSemi BuildRight (112) - : :- Exchange (86) - : : +- Filter (85) - : : +- Scan parquet (84) - : +- Exchange (111) - : +- Project (110) - : +- ShuffledHashJoin Inner BuildLeft (109) - : :- Exchange (95) - : : +- ShuffledHashJoin LeftSemi BuildRight (94) - : : :- Exchange (89) - : : : +- Filter (88) - : : : +- Scan parquet (87) - : : +- Exchange (93) - : : +- Project (92) - : : +- Filter (91) - : : +- Scan parquet (90) - : +- Exchange (108) - : +- Filter (107) - : +- HashAggregate (106) - : +- HashAggregate (105) - : +- ShuffledHashJoin LeftSemi BuildRight (104) - : :- Exchange (99) - : : +- Project (98) - : : +- Filter (97) - : : +- Scan parquet (96) - : +- Exchange (103) - : +- Project (102) - : +- Filter (101) - : +- Scan parquet (100) - +- Exchange (118) - +- Project (117) - +- Filter (116) - +- Scan parquet (115) + Sort (137) + +- Exchange (136) + +- Project (135) + +- ShuffledHashJoinExecTemp (134) + +- SortMergeJoin Inner (133) + :- Sort (127) + : +- Exchange (126) + : +- Project (125) + : +- ShuffledHashJoinExecTemp (124) + : +- SortMergeJoin LeftSemi (123) + : :- Sort (87) + : : +- Exchange (86) + : : +- Filter (85) + : : +- Scan parquet (84) + : +- Sort (122) + : +- Exchange (121) + : +- Project (120) + : +- ShuffledHashJoinExecTemp (119) + : +- SortMergeJoin Inner (118) + : :- Sort (100) + : : +- Exchange (99) + : : +- ShuffledHashJoinExecTemp (98) + : : +- SortMergeJoin LeftSemi (97) + : : :- Sort (91) + : : : +- Exchange (90) + : : : +- Filter (89) + : : : +- Scan parquet (88) + : : +- Sort (96) + : : +- Exchange (95) + : : +- Project (94) + : : +- Filter (93) + : : +- Scan parquet (92) + : +- Sort (117) + : +- Exchange (116) + : +- Filter (115) + : +- HashAggregate (114) + : +- HashAggregate (113) + : +- ShuffledHashJoinExecTemp (112) + : +- SortMergeJoin LeftSemi (111) + : :- Sort (105) + : : +- Exchange (104) + : : +- Project (103) + : : +- Filter (102) + : : +- Scan parquet (101) + : +- Sort (110) + : +- Exchange (109) + : +- Project (108) + : +- Filter (107) + : +- Scan parquet (106) + +- Sort (132) + +- Exchange (131) + +- Project (130) + +- Filter (129) + +- Scan parquet (128) (1) Scan parquet @@ -462,181 +477,241 @@ Condition : isnotnull(s_nationkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(87) Scan parquet +(87) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(88) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_availqty), IsNotNull(ps_partkey), IsNotNull(ps_suppkey)] ReadSchema: struct -(88) Filter +(89) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Condition : ((isnotnull(ps_availqty#X) AND isnotnull(ps_partkey#X)) AND isnotnull(ps_suppkey#X)) -(89) Exchange +(90) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(90) Scan parquet +(91) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: [ps_partkey#X ASC NULLS FIRST], false, 0 + +(92) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(91) Filter +(93) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(92) Project +(94) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(93) Exchange +(95) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(94) ShuffledHashJoin +(96) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(97) SortMergeJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join type: LeftSemi Join condition: None -(95) Exchange +(98) ShuffledHashJoinExecTemp +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: BuildRight + +(99) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(96) Scan parquet +(100) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: [ps_partkey#X ASC NULLS FIRST, ps_suppkey#X ASC NULLS FIRST], false, 0 + +(101) Scan parquet Output [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)] ReadSchema: struct -(97) Filter +(102) Filter Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND isnotnull(l_partkey#X)) AND isnotnull(l_suppkey#X)) -(98) Project +(103) Project Output [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] -(99) Exchange +(104) Exchange Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(100) Scan parquet +(105) Sort +Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(106) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(101) Filter +(107) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(102) Project +(108) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(103) Exchange +(109) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(104) ShuffledHashJoin +(110) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(111) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join type: LeftSemi Join condition: None -(105) HashAggregate +(112) ShuffledHashJoinExecTemp +Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] +Arguments: BuildRight + +(113) HashAggregate Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -(106) HashAggregate +(114) HashAggregate Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [(0.5 * sum(l_quantity#X)#X) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(107) Filter +(115) Filter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Condition : isnotnull((0.5 * sum(l_quantity))#X) -(108) Exchange +(116) Exchange Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(109) ShuffledHashJoin +(117) Sort +Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] +Arguments: [l_partkey#X ASC NULLS FIRST, l_suppkey#X ASC NULLS FIRST], false, 0 + +(118) SortMergeJoin Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join type: Inner Join condition: (cast(ps_availqty#X as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(110) Project +(119) ShuffledHashJoinExecTemp +Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] +Arguments: BuildLeft + +(120) Project Output [1]: [ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(111) Exchange +(121) Exchange Input [1]: [ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(112) ShuffledHashJoin +(122) Sort +Input [1]: [ps_suppkey#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(123) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join type: LeftSemi Join condition: None -(113) Project +(124) ShuffledHashJoinExecTemp +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Arguments: BuildRight + +(125) Project Output [3]: [s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(114) Exchange +(126) Exchange Input [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(115) Scan parquet +(127) Sort +Input [3]: [s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(128) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(116) Filter +(129) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(117) Project +(130) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(118) Exchange +(131) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(119) ShuffledHashJoin +(132) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(133) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(120) Project +(134) ShuffledHashJoinExecTemp +Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] +Arguments: BuildRight + +(135) Project Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(121) Exchange +(136) Exchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(122) Sort +(137) Sort Input [2]: [s_name#X, s_address#X] Arguments: [s_name#X ASC NULLS FIRST], true, 0 -(123) AdaptiveSparkPlan +(138) AdaptiveSparkPlan Output [2]: [s_name#X, s_address#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/21.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/21.txt index 323db548d24e..907c5f9009f0 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/21.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/21.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (118) +AdaptiveSparkPlan (133) +- == Final Plan == VeloxColumnarToRowExec (81) +- ^ RegularHashAggregateExecTransformer (79) @@ -62,42 +62,57 @@ AdaptiveSparkPlan (118) +- ^ FilterExecTransformer (63) +- ^ Scan parquet (62) +- == Initial Plan == - TakeOrderedAndProject (117) - +- HashAggregate (116) - +- Exchange (115) - +- HashAggregate (114) - +- Project (113) - +- ShuffledHashJoin Inner BuildRight (112) - :- Exchange (107) - : +- Project (106) - : +- ShuffledHashJoin Inner BuildRight (105) - : :- Exchange (100) - : : +- Project (99) - : : +- ShuffledHashJoin Inner BuildLeft (98) - : : :- Exchange (84) - : : : +- Filter (83) - : : : +- Scan parquet (82) - : : +- Exchange (97) - : : +- ShuffledHashJoin LeftAnti BuildRight (96) - : : :- ShuffledHashJoin LeftSemi BuildRight (91) - : : : :- Exchange (88) - : : : : +- Project (87) - : : : : +- Filter (86) - : : : : +- Scan parquet (85) - : : : +- Exchange (90) - : : : +- Scan parquet (89) - : : +- Exchange (95) - : : +- Project (94) - : : +- Filter (93) - : : +- Scan parquet (92) - : +- Exchange (104) - : +- Project (103) - : +- Filter (102) - : +- Scan parquet (101) - +- Exchange (111) - +- Project (110) - +- Filter (109) - +- Scan parquet (108) + TakeOrderedAndProject (132) + +- HashAggregate (131) + +- Exchange (130) + +- HashAggregate (129) + +- Project (128) + +- ShuffledHashJoinExecTemp (127) + +- SortMergeJoin Inner (126) + :- Sort (120) + : +- Exchange (119) + : +- Project (118) + : +- ShuffledHashJoinExecTemp (117) + : +- SortMergeJoin Inner (116) + : :- Sort (110) + : : +- Exchange (109) + : : +- Project (108) + : : +- ShuffledHashJoinExecTemp (107) + : : +- SortMergeJoin Inner (106) + : : :- Sort (85) + : : : +- Exchange (84) + : : : +- Filter (83) + : : : +- Scan parquet (82) + : : +- Sort (105) + : : +- Exchange (104) + : : +- ShuffledHashJoinExecTemp (103) + : : +- SortMergeJoin LeftAnti (102) + : : :- Sort (96) + : : : +- ShuffledHashJoinExecTemp (95) + : : : +- SortMergeJoin LeftSemi (94) + : : : :- Sort (90) + : : : : +- Exchange (89) + : : : : +- Project (88) + : : : : +- Filter (87) + : : : : +- Scan parquet (86) + : : : +- Sort (93) + : : : +- Exchange (92) + : : : +- Scan parquet (91) + : : +- Sort (101) + : : +- Exchange (100) + : : +- Project (99) + : : +- Filter (98) + : : +- Scan parquet (97) + : +- Sort (115) + : +- Exchange (114) + : +- Project (113) + : +- Filter (112) + : +- Scan parquet (111) + +- Sort (125) + +- Exchange (124) + +- Project (123) + +- Filter (122) + +- Scan parquet (121) (1) Scan parquet @@ -451,168 +466,228 @@ Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(85) Scan parquet +(85) Sort +Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(86) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(86) Filter +(87) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((((isnotnull(l_receiptdate#X) AND isnotnull(l_commitdate#X)) AND (l_receiptdate#X > l_commitdate#X)) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(87) Project +(88) Project Output [2]: [l_orderkey#X, l_suppkey#X] Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] -(88) Exchange +(89) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(89) Scan parquet +(90) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(91) Scan parquet Output [2]: [l_orderkey#X, l_suppkey#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(90) Exchange +(92) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(91) ShuffledHashJoin +(93) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(94) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: LeftSemi Join condition: NOT (l_suppkey#X = l_suppkey#X) -(92) Scan parquet +(95) ShuffledHashJoinExecTemp +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: BuildRight + +(96) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(97) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate)] ReadSchema: struct -(93) Filter +(98) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((isnotnull(l_receiptdate#X) AND isnotnull(l_commitdate#X)) AND (l_receiptdate#X > l_commitdate#X)) -(94) Project +(99) Project Output [2]: [l_orderkey#X, l_suppkey#X] Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] -(95) Exchange +(100) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(96) ShuffledHashJoin +(101) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(102) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: LeftAnti Join condition: NOT (l_suppkey#X = l_suppkey#X) -(97) Exchange +(103) ShuffledHashJoinExecTemp +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: BuildRight + +(104) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(98) ShuffledHashJoin +(105) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(106) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [l_suppkey#X] Join type: Inner Join condition: None -(99) Project +(107) ShuffledHashJoinExecTemp +Input [5]: [s_suppkey#X, s_name#X, s_nationkey#X, l_orderkey#X, l_suppkey#X] +Arguments: BuildLeft + +(108) Project Output [3]: [s_name#X, s_nationkey#X, l_orderkey#X] Input [5]: [s_suppkey#X, s_name#X, s_nationkey#X, l_orderkey#X, l_suppkey#X] -(100) Exchange +(109) Exchange Input [3]: [s_name#X, s_nationkey#X, l_orderkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(101) Scan parquet +(110) Sort +Input [3]: [s_name#X, s_nationkey#X, l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(111) Scan parquet Output [2]: [o_orderkey#X, o_orderstatus#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderstatus), EqualTo(o_orderstatus,F), IsNotNull(o_orderkey)] ReadSchema: struct -(102) Filter +(112) Filter Input [2]: [o_orderkey#X, o_orderstatus#X] Condition : ((isnotnull(o_orderstatus#X) AND (o_orderstatus#X = F)) AND isnotnull(o_orderkey#X)) -(103) Project +(113) Project Output [1]: [o_orderkey#X] Input [2]: [o_orderkey#X, o_orderstatus#X] -(104) Exchange +(114) Exchange Input [1]: [o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(105) ShuffledHashJoin +(115) Sort +Input [1]: [o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(116) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join type: Inner Join condition: None -(106) Project +(117) ShuffledHashJoinExecTemp +Input [4]: [s_name#X, s_nationkey#X, l_orderkey#X, o_orderkey#X] +Arguments: BuildRight + +(118) Project Output [2]: [s_name#X, s_nationkey#X] Input [4]: [s_name#X, s_nationkey#X, l_orderkey#X, o_orderkey#X] -(107) Exchange +(119) Exchange Input [2]: [s_name#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(108) Scan parquet +(120) Sort +Input [2]: [s_name#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(121) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,SAUDI ARABIA), IsNotNull(n_nationkey)] ReadSchema: struct -(109) Filter +(122) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = SAUDI ARABIA)) AND isnotnull(n_nationkey#X)) -(110) Project +(123) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(111) Exchange +(124) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(112) ShuffledHashJoin +(125) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(126) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(113) Project +(127) ShuffledHashJoinExecTemp +Input [3]: [s_name#X, s_nationkey#X, n_nationkey#X] +Arguments: BuildRight + +(128) Project Output [1]: [s_name#X] Input [3]: [s_name#X, s_nationkey#X, n_nationkey#X] -(114) HashAggregate +(129) HashAggregate Input [1]: [s_name#X] Keys [1]: [s_name#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [s_name#X, count#X] -(115) Exchange +(130) Exchange Input [2]: [s_name#X, count#X] Arguments: hashpartitioning(s_name#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(116) HashAggregate +(131) HashAggregate Input [2]: [s_name#X, count#X] Keys [1]: [s_name#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [s_name#X, count(1)#X AS numwait#X] -(117) TakeOrderedAndProject +(132) TakeOrderedAndProject Input [2]: [s_name#X, numwait#X] Arguments: X, [numwait#X DESC NULLS LAST, s_name#X ASC NULLS FIRST], [s_name#X, numwait#X] -(118) AdaptiveSparkPlan +(133) AdaptiveSparkPlan Output [2]: [s_name#X, numwait#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/22.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/22.txt index add978887b0a..76435db8e9e3 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/22.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/22.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (46) +AdaptiveSparkPlan (49) +- == Final Plan == VeloxColumnarToRowExec (33) +- ^ SortExecTransformer (31) @@ -26,18 +26,21 @@ AdaptiveSparkPlan (46) +- ^ ProjectExecTransformer (10) +- ^ Scan parquet (9) +- == Initial Plan == - Sort (45) - +- Exchange (44) - +- HashAggregate (43) - +- Exchange (42) - +- HashAggregate (41) - +- Project (40) - +- ShuffledHashJoin LeftAnti BuildRight (39) - :- Exchange (36) - : +- Filter (35) - : +- Scan parquet (34) - +- Exchange (38) - +- Scan parquet (37) + Sort (48) + +- Exchange (47) + +- HashAggregate (46) + +- Exchange (45) + +- HashAggregate (44) + +- Project (43) + +- ShuffledHashJoinExecTemp (42) + +- SortMergeJoin LeftAnti (41) + :- Sort (37) + : +- Exchange (36) + : +- Filter (35) + : +- Scan parquet (34) + +- Sort (40) + +- Exchange (39) + +- Scan parquet (38) (1) Scan parquet @@ -191,186 +194,198 @@ Condition : ((isnotnull(c_acctbal#X) AND substring(c_phone#X, 1, 2) IN (13,31,23 Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(37) Scan parquet +(37) Sort +Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(38) Scan parquet Output [1]: [o_custkey#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(38) Exchange +(39) Exchange Input [1]: [o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(39) ShuffledHashJoin +(40) Sort +Input [1]: [o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(41) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join type: LeftAnti Join condition: None -(40) Project +(42) ShuffledHashJoinExecTemp +Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] +Arguments: BuildRight + +(43) Project Output [2]: [substring(c_phone#X, 1, 2) AS cntrycode#X, c_acctbal#X] Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] -(41) HashAggregate +(44) HashAggregate Input [2]: [cntrycode#X, c_acctbal#X] Keys [1]: [cntrycode#X] Functions [2]: [partial_count(1), partial_sum(c_acctbal#X)] Aggregate Attributes [3]: [count#X, sum#X, isEmpty#X] Results [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] -(42) Exchange +(45) Exchange Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] Arguments: hashpartitioning(cntrycode#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) HashAggregate +(46) HashAggregate Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] Keys [1]: [cntrycode#X] Functions [2]: [count(1), sum(c_acctbal#X)] Aggregate Attributes [2]: [count(1)#X, sum(c_acctbal#X)#X] Results [3]: [cntrycode#X, count(1)#X AS numcust#X, sum(c_acctbal#X)#X AS totacctbal#X] -(44) Exchange +(47) Exchange Input [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: rangepartitioning(cntrycode#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(45) Sort +(48) Sort Input [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: [cntrycode#X ASC NULLS FIRST], true, 0 -(46) AdaptiveSparkPlan +(49) AdaptiveSparkPlan Output [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 2 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (65) +AdaptiveSparkPlan (68) +- == Final Plan == - VeloxColumnarToRowExec (58) - +- ^ RegularHashAggregateExecTransformer (56) - +- ^ InputIteratorTransformer (55) - +- ShuffleQueryStage (53), Statistics(X) - +- ColumnarExchange (52) - +- ^ FlushableHashAggregateExecTransformer (50) - +- ^ ProjectExecTransformer (49) - +- ^ FilterExecTransformer (48) - +- ^ Scan parquet (47) + VeloxColumnarToRowExec (61) + +- ^ RegularHashAggregateExecTransformer (59) + +- ^ InputIteratorTransformer (58) + +- ShuffleQueryStage (56), Statistics(X) + +- ColumnarExchange (55) + +- ^ FlushableHashAggregateExecTransformer (53) + +- ^ ProjectExecTransformer (52) + +- ^ FilterExecTransformer (51) + +- ^ Scan parquet (50) +- == Initial Plan == - HashAggregate (64) - +- Exchange (63) - +- HashAggregate (62) - +- Project (61) - +- Filter (60) - +- Scan parquet (59) + HashAggregate (67) + +- Exchange (66) + +- HashAggregate (65) + +- Project (64) + +- Filter (63) + +- Scan parquet (62) -(47) Scan parquet +(50) Scan parquet Output [2]: [c_phone#X, c_acctbal#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_acctbal), GreaterThan(c_acctbal,0.00)] ReadSchema: struct -(48) FilterExecTransformer +(51) FilterExecTransformer Input [2]: [c_phone#X, c_acctbal#X] Arguments: ((isnotnull(c_acctbal#X) AND (c_acctbal#X > 0.00)) AND substring(c_phone#X, 1, 2) IN (13,31,23,29,30,18,17)) -(49) ProjectExecTransformer +(52) ProjectExecTransformer Output [1]: [c_acctbal#X] Input [2]: [c_phone#X, c_acctbal#X] -(50) FlushableHashAggregateExecTransformer +(53) FlushableHashAggregateExecTransformer Input [1]: [c_acctbal#X] Keys: [] Functions [1]: [partial_avg(c_acctbal#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [2]: [sum#X, count#X] -(51) WholeStageCodegenTransformer (X) +(54) WholeStageCodegenTransformer (X) Input [2]: [sum#X, count#X] Arguments: false -(52) ColumnarExchange +(55) ColumnarExchange Input [2]: [sum#X, count#X] Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] -(53) ShuffleQueryStage +(56) ShuffleQueryStage Output [2]: [sum#X, count#X] Arguments: X -(54) InputAdapter +(57) InputAdapter Input [2]: [sum#X, count#X] -(55) InputIteratorTransformer +(58) InputIteratorTransformer Input [2]: [sum#X, count#X] -(56) RegularHashAggregateExecTransformer +(59) RegularHashAggregateExecTransformer Input [2]: [sum#X, count#X] Keys: [] Functions [1]: [avg(c_acctbal#X)] Aggregate Attributes [1]: [avg(c_acctbal#X)#X] Results [1]: [avg(c_acctbal#X)#X AS avg(c_acctbal)#X] -(57) WholeStageCodegenTransformer (X) +(60) WholeStageCodegenTransformer (X) Input [1]: [avg(c_acctbal)#X] Arguments: false -(58) VeloxColumnarToRowExec +(61) VeloxColumnarToRowExec Input [1]: [avg(c_acctbal)#X] -(59) Scan parquet +(62) Scan parquet Output [2]: [c_phone#X, c_acctbal#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_acctbal), GreaterThan(c_acctbal,0.00)] ReadSchema: struct -(60) Filter +(63) Filter Input [2]: [c_phone#X, c_acctbal#X] Condition : ((isnotnull(c_acctbal#X) AND (c_acctbal#X > 0.00)) AND substring(c_phone#X, 1, 2) IN (13,31,23,29,30,18,17)) -(61) Project +(64) Project Output [1]: [c_acctbal#X] Input [2]: [c_phone#X, c_acctbal#X] -(62) HashAggregate +(65) HashAggregate Input [1]: [c_acctbal#X] Keys: [] Functions [1]: [partial_avg(c_acctbal#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [2]: [sum#X, count#X] -(63) Exchange +(66) Exchange Input [2]: [sum#X, count#X] Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=X] -(64) HashAggregate +(67) HashAggregate Input [2]: [sum#X, count#X] Keys: [] Functions [1]: [avg(c_acctbal#X)] Aggregate Attributes [1]: [avg(c_acctbal#X)#X] Results [1]: [avg(c_acctbal#X)#X AS avg(c_acctbal)#X] -(65) AdaptiveSparkPlan +(68) AdaptiveSparkPlan Output [1]: [avg(c_acctbal)#X] Arguments: isFinalPlan=true Subquery:2 Hosting operator id = 1 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (65) +AdaptiveSparkPlan (68) +- == Final Plan == - VeloxColumnarToRowExec (58) - +- ^ RegularHashAggregateExecTransformer (56) - +- ^ InputIteratorTransformer (55) - +- ShuffleQueryStage (53), Statistics(X) - +- ColumnarExchange (52) - +- ^ FlushableHashAggregateExecTransformer (50) - +- ^ ProjectExecTransformer (49) - +- ^ FilterExecTransformer (48) - +- ^ Scan parquet (47) + VeloxColumnarToRowExec (61) + +- ^ RegularHashAggregateExecTransformer (59) + +- ^ InputIteratorTransformer (58) + +- ShuffleQueryStage (56), Statistics(X) + +- ColumnarExchange (55) + +- ^ FlushableHashAggregateExecTransformer (53) + +- ^ ProjectExecTransformer (52) + +- ^ FilterExecTransformer (51) + +- ^ Scan parquet (50) +- == Initial Plan == - HashAggregate (64) - +- Exchange (63) - +- HashAggregate (62) - +- Project (61) - +- Filter (60) - +- Scan parquet (59) \ No newline at end of file + HashAggregate (67) + +- Exchange (66) + +- HashAggregate (65) + +- Project (64) + +- Filter (63) + +- Scan parquet (62) \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/3.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/3.txt index 69dc65c58e21..8fa7b37373f5 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/3.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/3.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (59) +AdaptiveSparkPlan (65) +- == Final Plan == VeloxColumnarToRowExec (39) +- TakeOrderedAndProjectExecTransformer (38) @@ -32,25 +32,31 @@ AdaptiveSparkPlan (59) +- ^ FilterExecTransformer (25) +- ^ Scan parquet (24) +- == Initial Plan == - TakeOrderedAndProject (58) - +- HashAggregate (57) - +- HashAggregate (56) - +- Project (55) - +- ShuffledHashJoin Inner BuildRight (54) - :- Exchange (49) - : +- Project (48) - : +- ShuffledHashJoin Inner BuildLeft (47) - : :- Exchange (43) - : : +- Project (42) - : : +- Filter (41) - : : +- Scan parquet (40) - : +- Exchange (46) - : +- Filter (45) - : +- Scan parquet (44) - +- Exchange (53) - +- Project (52) - +- Filter (51) - +- Scan parquet (50) + TakeOrderedAndProject (64) + +- HashAggregate (63) + +- HashAggregate (62) + +- Project (61) + +- ShuffledHashJoinExecTemp (60) + +- SortMergeJoin Inner (59) + :- Sort (53) + : +- Exchange (52) + : +- Project (51) + : +- ShuffledHashJoinExecTemp (50) + : +- SortMergeJoin Inner (49) + : :- Sort (44) + : : +- Exchange (43) + : : +- Project (42) + : : +- Filter (41) + : : +- Scan parquet (40) + : +- Sort (48) + : +- Exchange (47) + : +- Filter (46) + : +- Scan parquet (45) + +- Sort (58) + +- Exchange (57) + +- Project (56) + +- Filter (55) + +- Scan parquet (54) (1) Scan parquet @@ -238,82 +244,106 @@ Input [2]: [c_custkey#X, c_mktsegment#X] Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(44) Scan parquet +(44) Sort +Input [1]: [c_custkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(45) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), LessThan(o_orderdate,1995-03-15), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(45) Filter +(46) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Condition : (((isnotnull(o_orderdate#X) AND (o_orderdate#X < 1995-03-15)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(46) Exchange +(47) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) ShuffledHashJoin +(48) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(49) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join type: Inner Join condition: None -(48) Project +(50) ShuffledHashJoinExecTemp +Input [5]: [c_custkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] +Arguments: BuildLeft + +(51) Project Output [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Input [5]: [c_custkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] -(49) Exchange +(52) Exchange Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(50) Scan parquet +(53) Sort +Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(54) Scan parquet Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThan(l_shipdate,1995-03-15), IsNotNull(l_orderkey)] ReadSchema: struct -(51) Filter +(55) Filter Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X > 1995-03-15)) AND isnotnull(l_orderkey#X)) -(52) Project +(56) Project Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(53) Exchange +(57) Exchange Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(54) ShuffledHashJoin +(58) Sort +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(59) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: Inner Join condition: None -(55) Project +(60) ShuffledHashJoinExecTemp +Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: BuildRight + +(61) Project Output [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] -(56) HashAggregate +(62) HashAggregate Input [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] -(57) HashAggregate +(63) HashAggregate Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [4]: [l_orderkey#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X, o_orderdate#X, o_shippriority#X] -(58) TakeOrderedAndProject +(64) TakeOrderedAndProject Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: X, [revenue#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] -(59) AdaptiveSparkPlan +(65) AdaptiveSparkPlan Output [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/4.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/4.txt index d7f42dd7b351..871da231e9fc 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/4.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/4.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (50) +AdaptiveSparkPlan (53) +- == Final Plan == VeloxColumnarToRowExec (34) +- ^ SortExecTransformer (32) @@ -27,21 +27,24 @@ AdaptiveSparkPlan (50) +- ^ FilterExecTransformer (10) +- ^ Scan parquet (9) +- == Initial Plan == - Sort (49) - +- Exchange (48) - +- HashAggregate (47) - +- Exchange (46) - +- HashAggregate (45) - +- Project (44) - +- ShuffledHashJoin LeftSemi BuildRight (43) - :- Exchange (38) - : +- Project (37) - : +- Filter (36) - : +- Scan parquet (35) - +- Exchange (42) - +- Project (41) - +- Filter (40) - +- Scan parquet (39) + Sort (52) + +- Exchange (51) + +- HashAggregate (50) + +- Exchange (49) + +- HashAggregate (48) + +- Project (47) + +- ShuffledHashJoinExecTemp (46) + +- SortMergeJoin LeftSemi (45) + :- Sort (39) + : +- Exchange (38) + : +- Project (37) + : +- Filter (36) + : +- Scan parquet (35) + +- Sort (44) + +- Exchange (43) + +- Project (42) + +- Filter (41) + +- Scan parquet (40) (1) Scan parquet @@ -204,61 +207,73 @@ Input [3]: [o_orderkey#X, o_orderdate#X, o_orderpriority#X] Input [2]: [o_orderkey#X, o_orderpriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(39) Scan parquet +(39) Sort +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(40) Scan parquet Output [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate)] ReadSchema: struct -(40) Filter +(41) Filter Input [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((isnotnull(l_commitdate#X) AND isnotnull(l_receiptdate#X)) AND (l_commitdate#X < l_receiptdate#X)) -(41) Project +(42) Project Output [1]: [l_orderkey#X] Input [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] -(42) Exchange +(43) Exchange Input [1]: [l_orderkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) ShuffledHashJoin +(44) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(45) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: LeftSemi Join condition: None -(44) Project +(46) ShuffledHashJoinExecTemp +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: BuildRight + +(47) Project Output [1]: [o_orderpriority#X] Input [2]: [o_orderkey#X, o_orderpriority#X] -(45) HashAggregate +(48) HashAggregate Input [1]: [o_orderpriority#X] Keys [1]: [o_orderpriority#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [o_orderpriority#X, count#X] -(46) Exchange +(49) Exchange Input [2]: [o_orderpriority#X, count#X] Arguments: hashpartitioning(o_orderpriority#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) HashAggregate +(50) HashAggregate Input [2]: [o_orderpriority#X, count#X] Keys [1]: [o_orderpriority#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [o_orderpriority#X, count(1)#X AS order_count#X] -(48) Exchange +(51) Exchange Input [2]: [o_orderpriority#X, order_count#X] Arguments: rangepartitioning(o_orderpriority#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(49) Sort +(52) Sort Input [2]: [o_orderpriority#X, order_count#X] Arguments: [o_orderpriority#X ASC NULLS FIRST], true, 0 -(50) AdaptiveSparkPlan +(53) AdaptiveSparkPlan Output [2]: [o_orderpriority#X, order_count#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/5.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/5.txt index 129b02c9548e..981831be90e1 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/5.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/5.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (134) +AdaptiveSparkPlan (149) +- == Final Plan == VeloxColumnarToRowExec (94) +- ^ SortExecTransformer (92) @@ -71,45 +71,60 @@ AdaptiveSparkPlan (134) +- ^ FilterExecTransformer (70) +- ^ Scan parquet (69) +- == Initial Plan == - Sort (133) - +- Exchange (132) - +- HashAggregate (131) - +- Exchange (130) - +- HashAggregate (129) - +- Project (128) - +- ShuffledHashJoin Inner BuildRight (127) - :- Exchange (122) - : +- Project (121) - : +- ShuffledHashJoin Inner BuildRight (120) - : :- Exchange (116) - : : +- Project (115) - : : +- ShuffledHashJoin Inner BuildRight (114) - : : :- Exchange (110) - : : : +- Project (109) - : : : +- ShuffledHashJoin Inner BuildRight (108) - : : : :- Exchange (104) - : : : : +- Project (103) - : : : : +- ShuffledHashJoin Inner BuildLeft (102) - : : : : :- Exchange (97) - : : : : : +- Filter (96) - : : : : : +- Scan parquet (95) - : : : : +- Exchange (101) - : : : : +- Project (100) - : : : : +- Filter (99) - : : : : +- Scan parquet (98) - : : : +- Exchange (107) - : : : +- Filter (106) - : : : +- Scan parquet (105) - : : +- Exchange (113) - : : +- Filter (112) - : : +- Scan parquet (111) - : +- Exchange (119) - : +- Filter (118) - : +- Scan parquet (117) - +- Exchange (126) - +- Project (125) - +- Filter (124) - +- Scan parquet (123) + Sort (148) + +- Exchange (147) + +- HashAggregate (146) + +- Exchange (145) + +- HashAggregate (144) + +- Project (143) + +- ShuffledHashJoinExecTemp (142) + +- SortMergeJoin Inner (141) + :- Sort (135) + : +- Exchange (134) + : +- Project (133) + : +- ShuffledHashJoinExecTemp (132) + : +- SortMergeJoin Inner (131) + : :- Sort (126) + : : +- Exchange (125) + : : +- Project (124) + : : +- ShuffledHashJoinExecTemp (123) + : : +- SortMergeJoin Inner (122) + : : :- Sort (117) + : : : +- Exchange (116) + : : : +- Project (115) + : : : +- ShuffledHashJoinExecTemp (114) + : : : +- SortMergeJoin Inner (113) + : : : :- Sort (108) + : : : : +- Exchange (107) + : : : : +- Project (106) + : : : : +- ShuffledHashJoinExecTemp (105) + : : : : +- SortMergeJoin Inner (104) + : : : : :- Sort (98) + : : : : : +- Exchange (97) + : : : : : +- Filter (96) + : : : : : +- Scan parquet (95) + : : : : +- Sort (103) + : : : : +- Exchange (102) + : : : : +- Project (101) + : : : : +- Filter (100) + : : : : +- Scan parquet (99) + : : : +- Sort (112) + : : : +- Exchange (111) + : : : +- Filter (110) + : : : +- Scan parquet (109) + : : +- Sort (121) + : : +- Exchange (120) + : : +- Filter (119) + : : +- Scan parquet (118) + : +- Sort (130) + : +- Exchange (129) + : +- Filter (128) + : +- Scan parquet (127) + +- Sort (140) + +- Exchange (139) + +- Project (138) + +- Filter (137) + +- Scan parquet (136) (1) Scan parquet @@ -512,181 +527,241 @@ Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(98) Scan parquet +(98) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(99) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1994-01-01), LessThan(o_orderdate,1995-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(99) Filter +(100) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1994-01-01)) AND (o_orderdate#X < 1995-01-01)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(100) Project +(101) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] -(101) Exchange +(102) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(102) ShuffledHashJoin +(103) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(104) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join type: Inner Join condition: None -(103) Project +(105) ShuffledHashJoinExecTemp +Input [4]: [c_custkey#X, c_nationkey#X, o_orderkey#X, o_custkey#X] +Arguments: BuildLeft + +(106) Project Output [2]: [c_nationkey#X, o_orderkey#X] Input [4]: [c_custkey#X, c_nationkey#X, o_orderkey#X, o_custkey#X] -(104) Exchange +(107) Exchange Input [2]: [c_nationkey#X, o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(105) Scan parquet +(108) Sort +Input [2]: [c_nationkey#X, o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(109) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_orderkey), IsNotNull(l_suppkey)] ReadSchema: struct -(106) Filter +(110) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Condition : (isnotnull(l_orderkey#X) AND isnotnull(l_suppkey#X)) -(107) Exchange +(111) Exchange Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(108) ShuffledHashJoin +(112) Sort +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(113) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: Inner Join condition: None -(109) Project +(114) ShuffledHashJoinExecTemp +Input [6]: [c_nationkey#X, o_orderkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: BuildRight + +(115) Project Output [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Input [6]: [c_nationkey#X, o_orderkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] -(110) Exchange +(116) Exchange Input [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(111) Scan parquet +(117) Sort +Input [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST, c_nationkey#X ASC NULLS FIRST], false, 0 + +(118) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(112) Filter +(119) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(113) Exchange +(120) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(121) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST, s_nationkey#X ASC NULLS FIRST], false, 0 + +(122) SortMergeJoin Left keys [2]: [l_suppkey#X, c_nationkey#X] Right keys [2]: [s_suppkey#X, s_nationkey#X] Join type: Inner Join condition: None -(115) Project +(123) ShuffledHashJoinExecTemp +Input [6]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] +Arguments: BuildRight + +(124) Project Output [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] Input [6]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(116) Exchange +(125) Exchange Input [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(126) Sort +Input [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(127) Scan parquet Output [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] ReadSchema: struct -(118) Filter +(128) Filter Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Condition : (isnotnull(n_nationkey#X) AND isnotnull(n_regionkey#X)) -(119) Exchange +(129) Exchange Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(130) Sort +Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(131) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(121) Project +(132) ShuffledHashJoinExecTemp +Input [6]: [l_extendedprice#X, l_discount#X, s_nationkey#X, n_nationkey#X, n_name#X, n_regionkey#X] +Arguments: BuildRight + +(133) Project Output [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] Input [6]: [l_extendedprice#X, l_discount#X, s_nationkey#X, n_nationkey#X, n_name#X, n_regionkey#X] -(122) Exchange +(134) Exchange Input [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] Arguments: hashpartitioning(n_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(135) Sort +Input [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] +Arguments: [n_regionkey#X ASC NULLS FIRST], false, 0 + +(136) Scan parquet Output [2]: [r_regionkey#X, r_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(r_name), EqualTo(r_name,ASIA), IsNotNull(r_regionkey)] ReadSchema: struct -(124) Filter +(137) Filter Input [2]: [r_regionkey#X, r_name#X] Condition : ((isnotnull(r_name#X) AND (r_name#X = ASIA)) AND isnotnull(r_regionkey#X)) -(125) Project +(138) Project Output [1]: [r_regionkey#X] Input [2]: [r_regionkey#X, r_name#X] -(126) Exchange +(139) Exchange Input [1]: [r_regionkey#X] Arguments: hashpartitioning(r_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(127) ShuffledHashJoin +(140) Sort +Input [1]: [r_regionkey#X] +Arguments: [r_regionkey#X ASC NULLS FIRST], false, 0 + +(141) SortMergeJoin Left keys [1]: [n_regionkey#X] Right keys [1]: [r_regionkey#X] Join type: Inner Join condition: None -(128) Project +(142) ShuffledHashJoinExecTemp +Input [5]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X, r_regionkey#X] +Arguments: BuildRight + +(143) Project Output [3]: [l_extendedprice#X, l_discount#X, n_name#X] Input [5]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X, r_regionkey#X] -(129) HashAggregate +(144) HashAggregate Input [3]: [l_extendedprice#X, l_discount#X, n_name#X] Keys [1]: [n_name#X] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [n_name#X, sum#X, isEmpty#X] -(130) Exchange +(145) Exchange Input [3]: [n_name#X, sum#X, isEmpty#X] Arguments: hashpartitioning(n_name#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(131) HashAggregate +(146) HashAggregate Input [3]: [n_name#X, sum#X, isEmpty#X] Keys [1]: [n_name#X] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [2]: [n_name#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X] -(132) Exchange +(147) Exchange Input [2]: [n_name#X, revenue#X] Arguments: rangepartitioning(revenue#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(133) Sort +(148) Sort Input [2]: [n_name#X, revenue#X] Arguments: [revenue#X DESC NULLS LAST], true, 0 -(134) AdaptiveSparkPlan +(149) AdaptiveSparkPlan Output [2]: [n_name#X, revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/7.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/7.txt index 1e6af0683a39..46cc4919d6fa 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/7.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/7.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (128) +AdaptiveSparkPlan (143) +- == Final Plan == VeloxColumnarToRowExec (90) +- ^ SortExecTransformer (88) @@ -68,43 +68,58 @@ AdaptiveSparkPlan (128) +- ShuffleQueryStage (70), Statistics(X) +- ReusedExchange (69) +- == Initial Plan == - Sort (127) - +- Exchange (126) - +- HashAggregate (125) - +- Exchange (124) - +- HashAggregate (123) - +- Project (122) - +- ShuffledHashJoin Inner BuildRight (121) - :- Exchange (117) - : +- Project (116) - : +- ShuffledHashJoin Inner BuildRight (115) - : :- Exchange (111) - : : +- Project (110) - : : +- ShuffledHashJoin Inner BuildRight (109) - : : :- Exchange (105) - : : : +- Project (104) - : : : +- ShuffledHashJoin Inner BuildRight (103) - : : : :- Exchange (99) - : : : : +- Project (98) - : : : : +- ShuffledHashJoin Inner BuildLeft (97) - : : : : :- Exchange (93) - : : : : : +- Filter (92) - : : : : : +- Scan parquet (91) - : : : : +- Exchange (96) - : : : : +- Filter (95) - : : : : +- Scan parquet (94) - : : : +- Exchange (102) - : : : +- Filter (101) - : : : +- Scan parquet (100) - : : +- Exchange (108) - : : +- Filter (107) - : : +- Scan parquet (106) - : +- Exchange (114) - : +- Filter (113) - : +- Scan parquet (112) - +- Exchange (120) - +- Filter (119) - +- Scan parquet (118) + Sort (142) + +- Exchange (141) + +- HashAggregate (140) + +- Exchange (139) + +- HashAggregate (138) + +- Project (137) + +- ShuffledHashJoinExecTemp (136) + +- SortMergeJoin Inner (135) + :- Sort (130) + : +- Exchange (129) + : +- Project (128) + : +- ShuffledHashJoinExecTemp (127) + : +- SortMergeJoin Inner (126) + : :- Sort (121) + : : +- Exchange (120) + : : +- Project (119) + : : +- ShuffledHashJoinExecTemp (118) + : : +- SortMergeJoin Inner (117) + : : :- Sort (112) + : : : +- Exchange (111) + : : : +- Project (110) + : : : +- ShuffledHashJoinExecTemp (109) + : : : +- SortMergeJoin Inner (108) + : : : :- Sort (103) + : : : : +- Exchange (102) + : : : : +- Project (101) + : : : : +- ShuffledHashJoinExecTemp (100) + : : : : +- SortMergeJoin Inner (99) + : : : : :- Sort (94) + : : : : : +- Exchange (93) + : : : : : +- Filter (92) + : : : : : +- Scan parquet (91) + : : : : +- Sort (98) + : : : : +- Exchange (97) + : : : : +- Filter (96) + : : : : +- Scan parquet (95) + : : : +- Sort (107) + : : : +- Exchange (106) + : : : +- Filter (105) + : : : +- Scan parquet (104) + : : +- Sort (116) + : : +- Exchange (115) + : : +- Filter (114) + : : +- Scan parquet (113) + : +- Sort (125) + : +- Exchange (124) + : +- Filter (123) + : +- Scan parquet (122) + +- Sort (134) + +- Exchange (133) + +- Filter (132) + +- Scan parquet (131) (1) Scan parquet @@ -487,173 +502,233 @@ Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(94) Scan parquet +(94) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(95) Scan parquet Output [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1995-01-01), LessThanOrEqual(l_shipdate,1996-12-31), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(95) Filter +(96) Filter Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1995-01-01)) AND (l_shipdate#X <= 1996-12-31)) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(96) Exchange +(97) Exchange Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(97) ShuffledHashJoin +(98) Sort +Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(99) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [l_suppkey#X] Join type: Inner Join condition: None -(98) Project +(100) ShuffledHashJoinExecTemp +Input [7]: [s_suppkey#X, s_nationkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: BuildLeft + +(101) Project Output [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Input [7]: [s_suppkey#X, s_nationkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(99) Exchange +(102) Exchange Input [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(100) Scan parquet +(103) Sort +Input [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(104) Scan parquet Output [2]: [o_orderkey#X, o_custkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderkey), IsNotNull(o_custkey)] ReadSchema: struct -(101) Filter +(105) Filter Input [2]: [o_orderkey#X, o_custkey#X] Condition : (isnotnull(o_orderkey#X) AND isnotnull(o_custkey#X)) -(102) Exchange +(106) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(103) ShuffledHashJoin +(107) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(108) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join type: Inner Join condition: None -(104) Project +(109) ShuffledHashJoinExecTemp +Input [7]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_orderkey#X, o_custkey#X] +Arguments: BuildRight + +(110) Project Output [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] Input [7]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_orderkey#X, o_custkey#X] -(105) Exchange +(111) Exchange Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(106) Scan parquet +(112) Sort +Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(113) Scan parquet Output [2]: [c_custkey#X, c_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] ReadSchema: struct -(107) Filter +(114) Filter Input [2]: [c_custkey#X, c_nationkey#X] Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) -(108) Exchange +(115) Exchange Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(109) ShuffledHashJoin +(116) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(117) SortMergeJoin Left keys [1]: [o_custkey#X] Right keys [1]: [c_custkey#X] Join type: Inner Join condition: None -(110) Project +(118) ShuffledHashJoinExecTemp +Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X, c_custkey#X, c_nationkey#X] +Arguments: BuildRight + +(119) Project Output [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X, c_custkey#X, c_nationkey#X] -(111) Exchange +(120) Exchange Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(112) Scan parquet +(121) Sort +Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(122) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), Or(EqualTo(n_name,FRANCE),EqualTo(n_name,GERMANY))] ReadSchema: struct -(113) Filter +(123) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : (isnotnull(n_nationkey#X) AND ((n_name#X = FRANCE) OR (n_name#X = GERMANY))) -(114) Exchange +(124) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(115) ShuffledHashJoin +(125) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(126) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(116) Project +(127) ShuffledHashJoinExecTemp +Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_nationkey#X, n_name#X] +Arguments: BuildRight + +(128) Project Output [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_nationkey#X, n_name#X] -(117) Exchange +(129) Exchange Input [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(118) Scan parquet +(130) Sort +Input [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(131) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), Or(EqualTo(n_name,GERMANY),EqualTo(n_name,FRANCE))] ReadSchema: struct -(119) Filter +(132) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : (isnotnull(n_nationkey#X) AND ((n_name#X = GERMANY) OR (n_name#X = FRANCE))) -(120) Exchange +(133) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(121) ShuffledHashJoin +(134) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(135) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: (((n_name#X = FRANCE) AND (n_name#X = GERMANY)) OR ((n_name#X = GERMANY) AND (n_name#X = FRANCE))) -(122) Project +(136) ShuffledHashJoinExecTemp +Input [7]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X, n_nationkey#X, n_name#X] +Arguments: BuildRight + +(137) Project Output [4]: [n_name#X AS supp_nation#X, n_name#X AS cust_nation#X, year(l_shipdate#X) AS l_year#X, (l_extendedprice#X * (1 - l_discount#X)) AS volume#X] Input [7]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X, n_nationkey#X, n_name#X] -(123) HashAggregate +(138) HashAggregate Input [4]: [supp_nation#X, cust_nation#X, l_year#X, volume#X] Keys [3]: [supp_nation#X, cust_nation#X, l_year#X] Functions [1]: [partial_sum(volume#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] -(124) Exchange +(139) Exchange Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] Arguments: hashpartitioning(supp_nation#X, cust_nation#X, l_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(125) HashAggregate +(140) HashAggregate Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] Keys [3]: [supp_nation#X, cust_nation#X, l_year#X] Functions [1]: [sum(volume#X)] Aggregate Attributes [1]: [sum(volume#X)#X] Results [4]: [supp_nation#X, cust_nation#X, l_year#X, sum(volume#X)#X AS revenue#X] -(126) Exchange +(141) Exchange Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: rangepartitioning(supp_nation#X ASC NULLS FIRST, cust_nation#X ASC NULLS FIRST, l_year#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(127) Sort +(142) Sort Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: [supp_nation#X ASC NULLS FIRST, cust_nation#X ASC NULLS FIRST, l_year#X ASC NULLS FIRST], true, 0 -(128) AdaptiveSparkPlan +(143) AdaptiveSparkPlan Output [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/8.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/8.txt index cfc03953fe18..df70d5254fc2 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/8.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/8.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (177) +AdaptiveSparkPlan (198) +- == Final Plan == VeloxColumnarToRowExec (125) +- ^ SortExecTransformer (123) @@ -94,57 +94,78 @@ AdaptiveSparkPlan (177) +- ^ FilterExecTransformer (100) +- ^ Scan parquet (99) +- == Initial Plan == - Sort (176) - +- Exchange (175) - +- HashAggregate (174) - +- Exchange (173) - +- HashAggregate (172) - +- Project (171) - +- ShuffledHashJoin Inner BuildRight (170) - :- Exchange (165) - : +- Project (164) - : +- ShuffledHashJoin Inner BuildRight (163) - : :- Exchange (159) - : : +- Project (158) - : : +- ShuffledHashJoin Inner BuildRight (157) - : : :- Exchange (153) - : : : +- Project (152) - : : : +- ShuffledHashJoin Inner BuildRight (151) - : : : :- Exchange (147) - : : : : +- Project (146) - : : : : +- ShuffledHashJoin Inner BuildRight (145) - : : : : :- Exchange (141) - : : : : : +- Project (140) - : : : : : +- ShuffledHashJoin Inner BuildRight (139) - : : : : : :- Exchange (135) - : : : : : : +- Project (134) - : : : : : : +- ShuffledHashJoin Inner BuildLeft (133) - : : : : : : :- Exchange (129) - : : : : : : : +- Project (128) - : : : : : : : +- Filter (127) - : : : : : : : +- Scan parquet (126) - : : : : : : +- Exchange (132) - : : : : : : +- Filter (131) - : : : : : : +- Scan parquet (130) - : : : : : +- Exchange (138) - : : : : : +- Filter (137) - : : : : : +- Scan parquet (136) - : : : : +- Exchange (144) - : : : : +- Filter (143) - : : : : +- Scan parquet (142) - : : : +- Exchange (150) - : : : +- Filter (149) - : : : +- Scan parquet (148) - : : +- Exchange (156) - : : +- Filter (155) - : : +- Scan parquet (154) - : +- Exchange (162) - : +- Filter (161) - : +- Scan parquet (160) - +- Exchange (169) - +- Project (168) - +- Filter (167) - +- Scan parquet (166) + Sort (197) + +- Exchange (196) + +- HashAggregate (195) + +- Exchange (194) + +- HashAggregate (193) + +- Project (192) + +- ShuffledHashJoinExecTemp (191) + +- SortMergeJoin Inner (190) + :- Sort (184) + : +- Exchange (183) + : +- Project (182) + : +- ShuffledHashJoinExecTemp (181) + : +- SortMergeJoin Inner (180) + : :- Sort (175) + : : +- Exchange (174) + : : +- Project (173) + : : +- ShuffledHashJoinExecTemp (172) + : : +- SortMergeJoin Inner (171) + : : :- Sort (166) + : : : +- Exchange (165) + : : : +- Project (164) + : : : +- ShuffledHashJoinExecTemp (163) + : : : +- SortMergeJoin Inner (162) + : : : :- Sort (157) + : : : : +- Exchange (156) + : : : : +- Project (155) + : : : : +- ShuffledHashJoinExecTemp (154) + : : : : +- SortMergeJoin Inner (153) + : : : : :- Sort (148) + : : : : : +- Exchange (147) + : : : : : +- Project (146) + : : : : : +- ShuffledHashJoinExecTemp (145) + : : : : : +- SortMergeJoin Inner (144) + : : : : : :- Sort (139) + : : : : : : +- Exchange (138) + : : : : : : +- Project (137) + : : : : : : +- ShuffledHashJoinExecTemp (136) + : : : : : : +- SortMergeJoin Inner (135) + : : : : : : :- Sort (130) + : : : : : : : +- Exchange (129) + : : : : : : : +- Project (128) + : : : : : : : +- Filter (127) + : : : : : : : +- Scan parquet (126) + : : : : : : +- Sort (134) + : : : : : : +- Exchange (133) + : : : : : : +- Filter (132) + : : : : : : +- Scan parquet (131) + : : : : : +- Sort (143) + : : : : : +- Exchange (142) + : : : : : +- Filter (141) + : : : : : +- Scan parquet (140) + : : : : +- Sort (152) + : : : : +- Exchange (151) + : : : : +- Filter (150) + : : : : +- Scan parquet (149) + : : : +- Sort (161) + : : : +- Exchange (160) + : : : +- Filter (159) + : : : +- Scan parquet (158) + : : +- Sort (170) + : : +- Exchange (169) + : : +- Filter (168) + : : +- Scan parquet (167) + : +- Sort (179) + : +- Exchange (178) + : +- Filter (177) + : +- Scan parquet (176) + +- Sort (189) + +- Exchange (188) + +- Project (187) + +- Filter (186) + +- Scan parquet (185) (1) Scan parquet @@ -677,235 +698,319 @@ Input [2]: [p_partkey#X, p_type#X] Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(130) Scan parquet +(130) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(131) Scan parquet Output [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(131) Filter +(132) Filter Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Condition : ((isnotnull(l_partkey#X) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(132) Exchange +(133) Exchange Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(133) ShuffledHashJoin +(134) Sort +Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(135) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join type: Inner Join condition: None -(134) Project +(136) ShuffledHashJoinExecTemp +Input [6]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: BuildLeft + +(137) Project Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Input [6]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] -(135) Exchange +(138) Exchange Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(136) Scan parquet +(139) Sort +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(140) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(137) Filter +(141) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(138) Exchange +(142) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(139) ShuffledHashJoin +(143) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(144) SortMergeJoin Left keys [1]: [l_suppkey#X] Right keys [1]: [s_suppkey#X] Join type: Inner Join condition: None -(140) Project +(145) ShuffledHashJoinExecTemp +Input [6]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] +Arguments: BuildRight + +(146) Project Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Input [6]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(141) Exchange +(147) Exchange Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(142) Scan parquet +(148) Sort +Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(149) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1995-01-01), LessThanOrEqual(o_orderdate,1996-12-31), IsNotNull(o_orderkey), IsNotNull(o_custkey)] ReadSchema: struct -(143) Filter +(150) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1995-01-01)) AND (o_orderdate#X <= 1996-12-31)) AND isnotnull(o_orderkey#X)) AND isnotnull(o_custkey#X)) -(144) Exchange +(151) Exchange Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(145) ShuffledHashJoin +(152) Sort +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(153) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join type: Inner Join condition: None -(146) Project +(154) ShuffledHashJoinExecTemp +Input [7]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X] +Arguments: BuildRight + +(155) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] Input [7]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X] -(147) Exchange +(156) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(148) Scan parquet +(157) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(158) Scan parquet Output [2]: [c_custkey#X, c_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] ReadSchema: struct -(149) Filter +(159) Filter Input [2]: [c_custkey#X, c_nationkey#X] Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) -(150) Exchange +(160) Exchange Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(151) ShuffledHashJoin +(161) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(162) SortMergeJoin Left keys [1]: [o_custkey#X] Right keys [1]: [c_custkey#X] Join type: Inner Join condition: None -(152) Project +(163) ShuffledHashJoinExecTemp +Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X, c_custkey#X, c_nationkey#X] +Arguments: BuildRight + +(164) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X, c_custkey#X, c_nationkey#X] -(153) Exchange +(165) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(154) Scan parquet +(166) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(167) Scan parquet Output [2]: [n_nationkey#X, n_regionkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] ReadSchema: struct -(155) Filter +(168) Filter Input [2]: [n_nationkey#X, n_regionkey#X] Condition : (isnotnull(n_nationkey#X) AND isnotnull(n_regionkey#X)) -(156) Exchange +(169) Exchange Input [2]: [n_nationkey#X, n_regionkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(157) ShuffledHashJoin +(170) Sort +Input [2]: [n_nationkey#X, n_regionkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(171) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(158) Project +(172) ShuffledHashJoinExecTemp +Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X, n_nationkey#X, n_regionkey#X] +Arguments: BuildRight + +(173) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X, n_nationkey#X, n_regionkey#X] -(159) Exchange +(174) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(160) Scan parquet +(175) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(176) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(161) Filter +(177) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(162) Exchange +(178) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(163) ShuffledHashJoin +(179) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(180) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(164) Project +(181) ShuffledHashJoinExecTemp +Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X, n_nationkey#X, n_name#X] +Arguments: BuildRight + +(182) Project Output [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X, n_nationkey#X, n_name#X] -(165) Exchange +(183) Exchange Input [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] Arguments: hashpartitioning(n_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(166) Scan parquet +(184) Sort +Input [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] +Arguments: [n_regionkey#X ASC NULLS FIRST], false, 0 + +(185) Scan parquet Output [2]: [r_regionkey#X, r_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(r_name), EqualTo(r_name,AMERICA), IsNotNull(r_regionkey)] ReadSchema: struct -(167) Filter +(186) Filter Input [2]: [r_regionkey#X, r_name#X] Condition : ((isnotnull(r_name#X) AND (r_name#X = AMERICA)) AND isnotnull(r_regionkey#X)) -(168) Project +(187) Project Output [1]: [r_regionkey#X] Input [2]: [r_regionkey#X, r_name#X] -(169) Exchange +(188) Exchange Input [1]: [r_regionkey#X] Arguments: hashpartitioning(r_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(170) ShuffledHashJoin +(189) Sort +Input [1]: [r_regionkey#X] +Arguments: [r_regionkey#X ASC NULLS FIRST], false, 0 + +(190) SortMergeJoin Left keys [1]: [n_regionkey#X] Right keys [1]: [r_regionkey#X] Join type: Inner Join condition: None -(171) Project +(191) ShuffledHashJoinExecTemp +Input [6]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X, r_regionkey#X] +Arguments: BuildRight + +(192) Project Output [3]: [year(o_orderdate#X) AS o_year#X, (l_extendedprice#X * (1 - l_discount#X)) AS volume#X, n_name#X AS nation#X] Input [6]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X, r_regionkey#X] -(172) HashAggregate +(193) HashAggregate Input [3]: [o_year#X, volume#X, nation#X] Keys [1]: [o_year#X] Functions [2]: [partial_sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END), partial_sum(volume#X)] Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Results [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] -(173) Exchange +(194) Exchange Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] Arguments: hashpartitioning(o_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(174) HashAggregate +(195) HashAggregate Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] Keys [1]: [o_year#X] Functions [2]: [sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END), sum(volume#X)] Aggregate Attributes [2]: [sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X, sum(volume#X)#X] Results [2]: [o_year#X, (sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X / sum(volume#X)#X) AS mkt_share#X] -(175) Exchange +(196) Exchange Input [2]: [o_year#X, mkt_share#X] Arguments: rangepartitioning(o_year#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(176) Sort +(197) Sort Input [2]: [o_year#X, mkt_share#X] Arguments: [o_year#X ASC NULLS FIRST], true, 0 -(177) AdaptiveSparkPlan +(198) AdaptiveSparkPlan Output [2]: [o_year#X, mkt_share#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/9.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/9.txt index 9a8c9a87aac2..c3830c7f11b6 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/9.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/9.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (133) +AdaptiveSparkPlan (148) +- == Final Plan == VeloxColumnarToRowExec (94) +- ^ SortExecTransformer (92) @@ -71,44 +71,59 @@ AdaptiveSparkPlan (133) +- ^ FilterExecTransformer (70) +- ^ Scan parquet (69) +- == Initial Plan == - Sort (132) - +- Exchange (131) - +- HashAggregate (130) - +- Exchange (129) - +- HashAggregate (128) - +- Project (127) - +- ShuffledHashJoin Inner BuildRight (126) - :- Exchange (122) - : +- Project (121) - : +- ShuffledHashJoin Inner BuildRight (120) - : :- Exchange (116) - : : +- Project (115) - : : +- ShuffledHashJoin Inner BuildRight (114) - : : :- Exchange (110) - : : : +- Project (109) - : : : +- ShuffledHashJoin Inner BuildRight (108) - : : : :- Exchange (104) - : : : : +- Project (103) - : : : : +- ShuffledHashJoin Inner BuildLeft (102) - : : : : :- Exchange (98) - : : : : : +- Project (97) - : : : : : +- Filter (96) - : : : : : +- Scan parquet (95) - : : : : +- Exchange (101) - : : : : +- Filter (100) - : : : : +- Scan parquet (99) - : : : +- Exchange (107) - : : : +- Filter (106) - : : : +- Scan parquet (105) - : : +- Exchange (113) - : : +- Filter (112) - : : +- Scan parquet (111) - : +- Exchange (119) - : +- Filter (118) - : +- Scan parquet (117) - +- Exchange (125) - +- Filter (124) - +- Scan parquet (123) + Sort (147) + +- Exchange (146) + +- HashAggregate (145) + +- Exchange (144) + +- HashAggregate (143) + +- Project (142) + +- ShuffledHashJoinExecTemp (141) + +- SortMergeJoin Inner (140) + :- Sort (135) + : +- Exchange (134) + : +- Project (133) + : +- ShuffledHashJoinExecTemp (132) + : +- SortMergeJoin Inner (131) + : :- Sort (126) + : : +- Exchange (125) + : : +- Project (124) + : : +- ShuffledHashJoinExecTemp (123) + : : +- SortMergeJoin Inner (122) + : : :- Sort (117) + : : : +- Exchange (116) + : : : +- Project (115) + : : : +- ShuffledHashJoinExecTemp (114) + : : : +- SortMergeJoin Inner (113) + : : : :- Sort (108) + : : : : +- Exchange (107) + : : : : +- Project (106) + : : : : +- ShuffledHashJoinExecTemp (105) + : : : : +- SortMergeJoin Inner (104) + : : : : :- Sort (99) + : : : : : +- Exchange (98) + : : : : : +- Project (97) + : : : : : +- Filter (96) + : : : : : +- Scan parquet (95) + : : : : +- Sort (103) + : : : : +- Exchange (102) + : : : : +- Filter (101) + : : : : +- Scan parquet (100) + : : : +- Sort (112) + : : : +- Exchange (111) + : : : +- Filter (110) + : : : +- Scan parquet (109) + : : +- Sort (121) + : : +- Exchange (120) + : : +- Filter (119) + : : +- Scan parquet (118) + : +- Sort (130) + : +- Exchange (129) + : +- Filter (128) + : +- Scan parquet (127) + +- Sort (139) + +- Exchange (138) + +- Filter (137) + +- Scan parquet (136) (1) Scan parquet @@ -515,173 +530,233 @@ Input [2]: [p_partkey#X, p_name#X] Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(99) Scan parquet +(99) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(100) Scan parquet Output [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(100) Filter +(101) Filter Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Condition : ((isnotnull(l_partkey#X) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(101) Exchange +(102) Exchange Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(102) ShuffledHashJoin +(103) Sort +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(104) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join type: Inner Join condition: None -(103) Project +(105) ShuffledHashJoinExecTemp +Input [7]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: BuildLeft + +(106) Project Output [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Input [7]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] -(104) Exchange +(107) Exchange Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(105) Scan parquet +(108) Sort +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(109) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(106) Filter +(110) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(107) Exchange +(111) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(108) ShuffledHashJoin +(112) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(113) SortMergeJoin Left keys [1]: [l_suppkey#X] Right keys [1]: [s_suppkey#X] Join type: Inner Join condition: None -(109) Project +(114) ShuffledHashJoinExecTemp +Input [8]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] +Arguments: BuildRight + +(115) Project Output [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Input [8]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(110) Exchange +(116) Exchange Input [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(l_suppkey#X, l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(111) Scan parquet +(117) Sort +Input [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [l_suppkey#X ASC NULLS FIRST, l_partkey#X ASC NULLS FIRST], false, 0 + +(118) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey), IsNotNull(ps_partkey)] ReadSchema: struct -(112) Filter +(119) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Condition : (isnotnull(ps_suppkey#X) AND isnotnull(ps_partkey#X)) -(113) Exchange +(120) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(121) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST, ps_partkey#X ASC NULLS FIRST], false, 0 + +(122) SortMergeJoin Left keys [2]: [l_suppkey#X, l_partkey#X] Right keys [2]: [ps_suppkey#X, ps_partkey#X] Join type: Inner Join condition: None -(115) Project +(123) ShuffledHashJoinExecTemp +Input [10]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_partkey#X, ps_suppkey#X, ps_supplycost#X] +Arguments: BuildRight + +(124) Project Output [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] Input [10]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_partkey#X, ps_suppkey#X, ps_supplycost#X] -(116) Exchange +(125) Exchange Input [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(126) Sort +Input [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(127) Scan parquet Output [2]: [o_orderkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderkey)] ReadSchema: struct -(118) Filter +(128) Filter Input [2]: [o_orderkey#X, o_orderdate#X] Condition : isnotnull(o_orderkey#X) -(119) Exchange +(129) Exchange Input [2]: [o_orderkey#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(130) Sort +Input [2]: [o_orderkey#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(131) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join type: Inner Join condition: None -(121) Project +(132) ShuffledHashJoinExecTemp +Input [8]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderkey#X, o_orderdate#X] +Arguments: BuildRight + +(133) Project Output [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] Input [8]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderkey#X, o_orderdate#X] -(122) Exchange +(134) Exchange Input [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(135) Sort +Input [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(136) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(124) Filter +(137) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(125) Exchange +(138) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) ShuffledHashJoin +(139) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(140) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(127) Project +(141) ShuffledHashJoinExecTemp +Input [8]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X, n_nationkey#X, n_name#X] +Arguments: BuildRight + +(142) Project Output [3]: [n_name#X AS nation#X, year(o_orderdate#X) AS o_year#X, ((l_extendedprice#X * (1 - l_discount#X)) - (ps_supplycost#X * l_quantity#X)) AS amount#X] Input [8]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X, n_nationkey#X, n_name#X] -(128) HashAggregate +(143) HashAggregate Input [3]: [nation#X, o_year#X, amount#X] Keys [2]: [nation#X, o_year#X] Functions [1]: [partial_sum(amount#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [nation#X, o_year#X, sum#X, isEmpty#X] -(129) Exchange +(144) Exchange Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] Arguments: hashpartitioning(nation#X, o_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(130) HashAggregate +(145) HashAggregate Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] Keys [2]: [nation#X, o_year#X] Functions [1]: [sum(amount#X)] Aggregate Attributes [1]: [sum(amount#X)#X] Results [3]: [nation#X, o_year#X, sum(amount#X)#X AS sum_profit#X] -(131) Exchange +(146) Exchange Input [3]: [nation#X, o_year#X, sum_profit#X] Arguments: rangepartitioning(nation#X ASC NULLS FIRST, o_year#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) Sort +(147) Sort Input [3]: [nation#X, o_year#X, sum_profit#X] Arguments: [nation#X ASC NULLS FIRST, o_year#X DESC NULLS LAST], true, 0 -(133) AdaptiveSparkPlan +(148) AdaptiveSparkPlan Output [3]: [nation#X, o_year#X, sum_profit#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/gluten-core/src/main/scala/org/apache/gluten/execution/JoinExecTransformer.scala b/gluten-core/src/main/scala/org/apache/gluten/execution/JoinExecTransformer.scala index cd22c578594c..c42b109bd732 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/execution/JoinExecTransformer.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/execution/JoinExecTransformer.scala @@ -27,12 +27,13 @@ import org.apache.gluten.substrait.expression.{ExpressionBuilder, ExpressionNode import org.apache.gluten.utils.SubstraitUtil import org.apache.spark.rdd.RDD +import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight, BuildSide} import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.physical._ -import org.apache.spark.sql.execution.{ExpandOutputPartitioningShim, ExplainUtils, SparkPlan} -import org.apache.spark.sql.execution.joins.{BaseJoinExec, HashedRelationBroadcastMode, HashJoin} +import org.apache.spark.sql.execution.{ExpandOutputPartitioningShim, ExplainUtils, SparkPlan, UnaryExecNode} +import org.apache.spark.sql.execution.joins.{BaseJoinExec, HashedRelationBroadcastMode, HashJoin, ShuffledJoin} import org.apache.spark.sql.execution.metric.SQLMetric import org.apache.spark.sql.types._ import org.apache.spark.sql.vectorized.ColumnarBatch @@ -420,3 +421,12 @@ abstract class BroadcastHashJoinExecTransformerBase( (1, if (isNullAwareAntiJoin) 1 else 0, buildHashTableId) } } + +case class ShuffledHashJoinExecTemp(child: ShuffledJoin, buildSide: BuildSide) + extends UnaryExecNode { + override protected def doExecute(): RDD[InternalRow] = throw new UnsupportedOperationException() + override def output: Seq[Attribute] = child.output + override protected def withNewChildInternal(newChild: SparkPlan): ShuffledHashJoinExecTemp = + copy(child = newChild.asInstanceOf[ShuffledJoin]) + override def outputPartitioning: Partitioning = child.outputPartitioning +} diff --git a/gluten-core/src/main/scala/org/apache/gluten/extension/StrategyOverrides.scala b/gluten-core/src/main/scala/org/apache/gluten/extension/StrategyOverrides.scala index f2f786259393..d373f65fe92e 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/extension/StrategyOverrides.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/extension/StrategyOverrides.scala @@ -18,19 +18,19 @@ package org.apache.gluten.extension import org.apache.gluten.{GlutenConfig, GlutenSparkExtensionsInjector} import org.apache.gluten.backendsapi.BackendsApiManager +import org.apache.gluten.execution.ShuffledHashJoinExecTemp import org.apache.gluten.extension.columnar.TRANSFORM_UNSUPPORTED import org.apache.gluten.extension.columnar.TransformHints.TAG import org.apache.gluten.utils.LogicalPlanSelector import org.apache.spark.sql.{SparkSession, SparkSessionExtensions, Strategy} import org.apache.spark.sql.catalyst.SQLConfHelper -import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight, JoinSelectionHelper} import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ -import org.apache.spark.sql.execution.{joins, JoinSelectionShim, SparkPlan} +import org.apache.spark.sql.execution.{JoinSelectionShim, SparkPlan} import org.apache.spark.sql.execution.adaptive.{BroadcastQueryStageExec, LogicalQueryStage} -import org.apache.spark.sql.execution.joins.BroadcastHashJoinExec +import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, ShuffledHashJoinExec, ShuffledJoin, SortMergeJoinExec} object StrategyOverrides extends GlutenSparkExtensionsInjector { override def inject(extensions: SparkSessionExtensions): Unit = { @@ -38,6 +38,8 @@ object StrategyOverrides extends GlutenSparkExtensionsInjector { } } +// Use the smaller table to build hashmap in shuffled hash join. BuildSide needs to be generated +// in PlannerStrategy, otherwise larger table may be used. case class JoinSelectionOverrides(session: SparkSession) extends Strategy with JoinSelectionHelper @@ -48,107 +50,6 @@ case class JoinSelectionOverrides(session: SparkSession) case _ => false } - def extractEqualJoinKeyCondition( - joinType: JoinType, - leftKeys: Seq[Expression], - rightKeys: Seq[Expression], - condition: Option[Expression], - left: LogicalPlan, - right: LogicalPlan, - hint: JoinHint, - forceShuffledHashJoin: Boolean): Seq[SparkPlan] = { - if (isBroadcastStage(left) || isBroadcastStage(right)) { - val buildSide = if (isBroadcastStage(left)) BuildLeft else BuildRight - Seq( - BroadcastHashJoinExec( - leftKeys, - rightKeys, - joinType, - buildSide, - condition, - planLater(left), - planLater(right))) - } else { - // Generate BHJ here, avoid to do match in `JoinSelection` again. - val isHintEmpty = hint.leftHint.isEmpty && hint.rightHint.isEmpty - val buildSide = getBroadcastBuildSide(left, right, joinType, hint, !isHintEmpty, conf) - if (buildSide.isDefined) { - return Seq( - joins.BroadcastHashJoinExec( - leftKeys, - rightKeys, - joinType, - buildSide.get, - condition, - planLater(left), - planLater(right))) - } - - if ( - forceShuffledHashJoin && - !BackendsApiManager.getSparkPlanExecApiInstance.joinFallback( - joinType, - left.outputSet, - right.outputSet, - condition) && - !left.getTagValue(TAG).isDefined && - !right.getTagValue(TAG).isDefined - ) { - // Force use of ShuffledHashJoin in preference to SortMergeJoin. With no respect to - // conf setting "spark.sql.join.preferSortMergeJoin". - val (leftBuildable, rightBuildable) = - if (BackendsApiManager.getSettings.utilizeShuffledHashJoinHint()) { - // Currently, ClickHouse backend can not support AQE, so it needs to use join hint - // to decide the build side, after supporting AQE, will remove this. - val leftHintEnabled = hintToShuffleHashJoinLeft(hint) - val rightHintEnabled = hintToShuffleHashJoinRight(hint) - val leftHintMergeEnabled = hint.leftHint.exists(_.strategy.contains(SHUFFLE_MERGE)) - val rightHintMergeEnabled = hint.rightHint.exists(_.strategy.contains(SHUFFLE_MERGE)) - if (leftHintEnabled || rightHintEnabled) { - (leftHintEnabled, rightHintEnabled) - } else if (leftHintMergeEnabled || rightHintMergeEnabled) { - // hack: when set SHUFFLE_MERGE hint, it means that - // it don't use this side as the build side - (!leftHintMergeEnabled, !rightHintMergeEnabled) - } else { - ( - BackendsApiManager.getSettings.supportHashBuildJoinTypeOnLeft(joinType), - BackendsApiManager.getSettings.supportHashBuildJoinTypeOnRight(joinType)) - } - } else { - (canBuildShuffledHashJoinLeft(joinType), canBuildShuffledHashJoinRight(joinType)) - } - - if (!leftBuildable && !rightBuildable) { - return Nil - } - val buildSide = if (!leftBuildable) { - BuildRight - } else if (!rightBuildable) { - BuildLeft - } else { - getSmallerSide(left, right) - } - - return Option(buildSide) - .map { - buildSide => - Seq( - joins.ShuffledHashJoinExec( - leftKeys, - rightKeys, - joinType, - buildSide, - condition, - planLater(left), - planLater(right))) - } - .getOrElse(Nil) - } - Nil - } - } - def existsMultiJoins(plan: LogicalPlan, count: Int = 0): Boolean = { plan match { case plan: Join => @@ -157,7 +58,7 @@ case class JoinSelectionOverrides(session: SparkSession) case plan: Project => if ((count + 1) >= GlutenConfig.getConf.logicalJoinOptimizationThrottle) return true plan.children.exists(existsMultiJoins(_, count + 1)) - case other => false + case _ => false } } @@ -166,6 +67,11 @@ case class JoinSelectionOverrides(session: SparkSession) plan } + def tagNotTransformable(plan: ShuffledJoin, reason: String): ShuffledJoin = { + plan.setTagValue(TAG, TRANSFORM_UNSUPPORTED(Some(reason))) + plan + } + def tagNotTransformableRecursive(plan: LogicalPlan, reason: String): LogicalPlan = { tagNotTransformable( plan.withNewChildren(plan.children.map(tagNotTransformableRecursive(_, reason))), @@ -179,6 +85,26 @@ case class JoinSelectionOverrides(session: SparkSession) }.size > 0 } + def genShuffledHashJoinExecTemp( + joinType: JoinType, + left: LogicalPlan, + right: LogicalPlan, + join: ShuffledJoin): ShuffledHashJoinExecTemp = { + val leftBuildable = BackendsApiManager.getSettings + .supportHashBuildJoinTypeOnLeft(joinType) + val rightBuildable = BackendsApiManager.getSettings + .supportHashBuildJoinTypeOnRight(joinType) + val buildSide = if (!leftBuildable) { + BuildRight + } else if (!rightBuildable) { + BuildLeft + } else { + getSmallerSide(left, right) + } + val child = tagNotTransformable(join, "child of ShuffledHashJoinExecTemp") + ShuffledHashJoinExecTemp(child, buildSide) + } + override def apply(plan: LogicalPlan): Seq[SparkPlan] = LogicalPlanSelector.maybeNil(session, plan) { // Ignore forceShuffledHashJoin if exist multi continuous joins @@ -189,24 +115,65 @@ case class JoinSelectionOverrides(session: SparkSession) tagNotTransformableRecursive(plan, "exist multi continuous joins") } plan match { - // If the build side of BHJ is already decided by AQE, we need to keep the build side. - case JoinSelectionShim.ExtractEquiJoinKeysShim( + case j @ JoinSelectionShim.ExtractEquiJoinKeysShim( joinType, leftKeys, rightKeys, condition, left, right, - hint) => - extractEqualJoinKeyCondition( - joinType, - leftKeys, - rightKeys, - condition, - left, - right, - hint, - GlutenConfig.getConf.forceShuffledHashJoin) + _) + if !isBroadcastStage(left) && !isBroadcastStage(right) && + !BackendsApiManager.getSparkPlanExecApiInstance.joinFallback( + joinType, + left.outputSet, + right.outputSet, + condition) => + val originalJoinExec = session.sessionState.planner.JoinSelection.apply(j) + // TODO: ShuffledHashJoinExecTemp adapts to RAS + if ( + GlutenConfig.getConf.enableRas && GlutenConfig.getConf.forceShuffledHashJoin && + !left.getTagValue(TAG).isDefined && !right.getTagValue(TAG).isDefined + ) { + originalJoinExec(0) match { + case _: BroadcastHashJoinExec => originalJoinExec + case _: ShuffledHashJoinExec => originalJoinExec + case _ => + val leftBuildable = canBuildShuffledHashJoinLeft(joinType) + val rightBuildable = canBuildShuffledHashJoinRight(joinType) + val buildSide = if (!leftBuildable) { + BuildRight + } else if (!rightBuildable) { + BuildLeft + } else { + getSmallerSide(left, right) + } + Option(buildSide) + .map { + buildSide => + Seq( + ShuffledHashJoinExec( + leftKeys, + rightKeys, + joinType, + buildSide, + condition, + planLater(left), + planLater(right))) + } + .getOrElse(Nil) + } + } else { + originalJoinExec(0) match { + case shj: ShuffledHashJoinExec => + Seq(genShuffledHashJoinExecTemp(joinType, left, right, shj)) + case smj: SortMergeJoinExec + if GlutenConfig.getConf.forceShuffledHashJoin && + !left.getTagValue(TAG).isDefined && !right.getTagValue(TAG).isDefined => + Seq(genShuffledHashJoinExecTemp(joinType, left, right, smj)) + case _ => originalJoinExec + } + } case _ => Nil } } diff --git a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/OffloadSingleNode.scala b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/OffloadSingleNode.scala index 6e4d37f633eb..98389491e165 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/OffloadSingleNode.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/OffloadSingleNode.scala @@ -119,69 +119,90 @@ case class OffloadExchange() extends OffloadSingleNode with LogLevelUtil { // Join transformation. case class OffloadJoin() extends OffloadSingleNode with LogLevelUtil { - override def offload(plan: SparkPlan): SparkPlan = { - if (TransformHints.isNotTransformable(plan)) { + private def dropPartialSort(plan: SparkPlan): SparkPlan = plan match { + case sort: SortExecTransformer if !sort.global => + sort.child + case sort: SortExec if !sort.global => + sort.child + case _ => plan + } + + override def offload(plan: SparkPlan): SparkPlan = plan match { + case ShuffledHashJoinExecTemp(join, _) if TransformHints.isNotTransformable(plan) => + logDebug(s"Columnar Processing for ${join.getClass} is under row guard.") + join + case p if TransformHints.isNotTransformable(p) => logDebug(s"Columnar Processing for ${plan.getClass} is under row guard.") - return plan - } - plan match { - case plan: ShuffledHashJoinExec => - val left = plan.left - val right = plan.right - logDebug(s"Columnar Processing for ${plan.getClass} is currently supported.") - BackendsApiManager.getSparkPlanExecApiInstance - .genShuffledHashJoinExecTransformer( - plan.leftKeys, - plan.rightKeys, - plan.joinType, - TransformHints.getShuffleHashJoinBuildSide(plan), - plan.condition, - left, - right, - plan.isSkewJoin) - case plan: SortMergeJoinExec => - val left = plan.left - val right = plan.right - logDebug(s"Columnar Processing for ${plan.getClass} is currently supported.") - BackendsApiManager.getSparkPlanExecApiInstance - .genSortMergeJoinExecTransformer( - plan.leftKeys, - plan.rightKeys, - plan.joinType, - plan.condition, - left, - right, - plan.isSkewJoin) - case plan: BroadcastHashJoinExec => - val left = plan.left - val right = plan.right - BackendsApiManager.getSparkPlanExecApiInstance - .genBroadcastHashJoinExecTransformer( - plan.leftKeys, - plan.rightKeys, - plan.joinType, - plan.buildSide, - plan.condition, - left, - right, - isNullAwareAntiJoin = plan.isNullAwareAntiJoin) - case plan: CartesianProductExec => - val left = plan.left - val right = plan.right - BackendsApiManager.getSparkPlanExecApiInstance - .genCartesianProductExecTransformer(left, right, plan.condition) - case plan: BroadcastNestedLoopJoinExec => - val left = plan.left - val right = plan.right - BackendsApiManager.getSparkPlanExecApiInstance - .genBroadcastNestedLoopJoinExecTransformer( - left, - right, - plan.buildSide, - plan.joinType, - plan.condition) - case other => other - } + p + case ShuffledHashJoinExecTemp(join, buildSide) => + logDebug(s"Columnar Processing for ${join.getClass} is currently supported.") + BackendsApiManager.getSparkPlanExecApiInstance + .genShuffledHashJoinExecTransformer( + join.leftKeys, + join.rightKeys, + join.joinType, + buildSide, + join.condition, + dropPartialSort(join.left), + dropPartialSort(join.right), + join.isSkewJoin) + // TODO: Remove after ShuffledHashJoinExecTemp adapts to RAS + case plan: ShuffledHashJoinExec => + val left = plan.left + val right = plan.right + logDebug(s"Columnar Processing for ${plan.getClass} is currently supported.") + BackendsApiManager.getSparkPlanExecApiInstance + .genShuffledHashJoinExecTransformer( + plan.leftKeys, + plan.rightKeys, + plan.joinType, + TransformHints.getShuffleHashJoinBuildSide(plan), + plan.condition, + left, + right, + plan.isSkewJoin) + case plan: SortMergeJoinExec => + val left = plan.left + val right = plan.right + logDebug(s"Columnar Processing for ${plan.getClass} is currently supported.") + BackendsApiManager.getSparkPlanExecApiInstance + .genSortMergeJoinExecTransformer( + plan.leftKeys, + plan.rightKeys, + plan.joinType, + plan.condition, + left, + right, + plan.isSkewJoin) + case plan: BroadcastHashJoinExec => + val left = plan.left + val right = plan.right + BackendsApiManager.getSparkPlanExecApiInstance + .genBroadcastHashJoinExecTransformer( + plan.leftKeys, + plan.rightKeys, + plan.joinType, + plan.buildSide, + plan.condition, + left, + right, + isNullAwareAntiJoin = plan.isNullAwareAntiJoin) + case plan: CartesianProductExec => + val left = plan.left + val right = plan.right + BackendsApiManager.getSparkPlanExecApiInstance + .genCartesianProductExecTransformer(left, right, plan.condition) + case plan: BroadcastNestedLoopJoinExec => + val left = plan.left + val right = plan.right + BackendsApiManager.getSparkPlanExecApiInstance + .genBroadcastNestedLoopJoinExecTransformer( + left, + right, + plan.buildSide, + plan.joinType, + plan.condition) + case other => other } } diff --git a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/TransformHintRule.scala b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/TransformHintRule.scala index ca35c74f6892..07a45733b22c 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/TransformHintRule.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/TransformHintRule.scala @@ -161,29 +161,25 @@ object TransformHints { } } + // TODO: Remove after ShuffledHashJoinExecTemp adapts to RAS def getShuffleHashJoinBuildSide(shj: ShuffledHashJoinExec): BuildSide = { - if (BackendsApiManager.getSettings.utilizeShuffledHashJoinHint()) { - shj.buildSide + val leftBuildable = BackendsApiManager.getSettings + .supportHashBuildJoinTypeOnLeft(shj.joinType) + val rightBuildable = BackendsApiManager.getSettings + .supportHashBuildJoinTypeOnRight(shj.joinType) + if (!leftBuildable) { + BuildRight + } else if (!rightBuildable) { + BuildLeft } else { - val leftBuildable = BackendsApiManager.getSettings - .supportHashBuildJoinTypeOnLeft(shj.joinType) - val rightBuildable = BackendsApiManager.getSettings - .supportHashBuildJoinTypeOnRight(shj.joinType) - - if (!leftBuildable) { - BuildRight - } else if (!rightBuildable) { - BuildLeft - } else { - shj.logicalLink match { - case Some(join: Join) => - val leftSize = join.left.stats.sizeInBytes - val rightSize = join.right.stats.sizeInBytes - if (rightSize <= leftSize) BuildRight else BuildLeft - // Only the ShuffledHashJoinExec generated directly in some spark tests is not link - // logical plan, such as OuterJoinSuite. - case _ => shj.buildSide - } + shj.logicalLink match { + case Some(join: Join) => + val leftSize = join.left.stats.sizeInBytes + val rightSize = join.right.stats.sizeInBytes + if (rightSize <= leftSize) BuildRight else BuildLeft + // Only the ShuffledHashJoinExec generated directly in some spark tests is not link + // logical plan, such as OuterJoinSuite. + case _ => shj.buildSide } } } @@ -415,6 +411,7 @@ case class AddTransformHintRule() extends Rule[SparkPlan] { case plan: ShuffleExchangeExec => val transformer = ColumnarShuffleExchangeExec(plan, plan.child, plan.child.output) transformer.doValidate().tagOnFallback(plan) + // TODO: Remove after ShuffledHashJoinExecTemp adapts to RAS case plan: ShuffledHashJoinExec => val transformer = BackendsApiManager.getSparkPlanExecApiInstance .genShuffledHashJoinExecTransformer( @@ -427,6 +424,18 @@ case class AddTransformHintRule() extends Rule[SparkPlan] { plan.right, plan.isSkewJoin) transformer.doValidate().tagOnFallback(plan) + case plan @ ShuffledHashJoinExecTemp(join, buildSide) => + val transformer = BackendsApiManager.getSparkPlanExecApiInstance + .genShuffledHashJoinExecTransformer( + join.leftKeys, + join.rightKeys, + join.joinType, + buildSide, + join.condition, + join.left, + join.right, + join.isSkewJoin) + transformer.doValidate().tagOnFallback(plan) case plan: BroadcastExchangeExec => val transformer = ColumnarBroadcastExchangeExec(plan.mode, plan.child) transformer.doValidate().tagOnFallback(plan) diff --git a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/execution/joins/GlutenBroadcastJoinSuite.scala b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/execution/joins/GlutenBroadcastJoinSuite.scala index c9ccc1afc75d..1f7901979c43 100644 --- a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/execution/joins/GlutenBroadcastJoinSuite.scala +++ b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/execution/joins/GlutenBroadcastJoinSuite.scala @@ -247,7 +247,7 @@ class GlutenBroadcastJoinSuite extends BroadcastJoinSuite with GlutenTestsCommon val plan = EnsureRequirements.apply(df5.queryExecution.sparkPlan) assert(plan.collect { case p: BroadcastHashJoinExec => p }.size === 1) - assert(plan.collect { case p: ShuffledHashJoinExec => p }.size === 1) + assert(plan.collect { case p: SortMergeJoinExec => p }.size === 1) } } diff --git a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenJoinSuite.scala b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenJoinSuite.scala index 4ac8bd3ea8bf..7ad9b5aa63b5 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenJoinSuite.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenJoinSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql import org.apache.spark.sql.execution.SparkPlan -import org.apache.spark.sql.execution.joins.ShuffledHashJoinExec +import org.apache.spark.sql.execution.joins.SortMergeJoinExec class GlutenJoinSuite extends JoinSuite with GlutenSQLTestsTrait { @@ -58,9 +58,9 @@ class GlutenJoinSuite extends JoinSuite with GlutenSQLTestsTrait { testGluten( "SPARK-43113: Full outer join with duplicate stream-side" + - " references in condition (SHJ)") { + " references in condition (SMJ)") { def check(plan: SparkPlan): Unit = { - assert(collect(plan) { case _: ShuffledHashJoinExec => true }.size === 1) + assert(collect(plan) { case _: SortMergeJoinExec => true }.size === 1) } dupStreamSideColTest("MERGE", check) diff --git a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenJoinSuite.scala b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenJoinSuite.scala index 09718fb1a439..aa396bb78f6d 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenJoinSuite.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenJoinSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql import org.apache.spark.sql.execution.SparkPlan -import org.apache.spark.sql.execution.joins.ShuffledHashJoinExec +import org.apache.spark.sql.execution.joins.SortMergeJoinExec class GlutenJoinSuite extends JoinSuite with GlutenSQLTestsTrait { @@ -60,9 +60,9 @@ class GlutenJoinSuite extends JoinSuite with GlutenSQLTestsTrait { testGluten( "SPARK-43113: Full outer join with duplicate stream-side" + - " references in condition (SHJ)") { + " references in condition (SMJ)") { def check(plan: SparkPlan): Unit = { - assert(collect(plan) { case _: ShuffledHashJoinExec => true }.size === 1) + assert(collect(plan) { case _: SortMergeJoinExec => true }.size === 1) } dupStreamSideColTest("MERGE", check)