diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHBackend.scala b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHBackend.scala index 1587b9ea3488..cdca1b031a91 100644 --- a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHBackend.scala +++ b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHBackend.scala @@ -189,7 +189,6 @@ object CHBackendSettings extends BackendSettingsApi with Logging { } } - override def utilizeShuffledHashJoinHint(): Boolean = true override def supportShuffleWithProject( outputPartitioning: Partitioning, child: SparkPlan): Boolean = { diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableColumnarShuffleSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableColumnarShuffleSuite.scala index 245b52d37109..c5f67f45d577 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableColumnarShuffleSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableColumnarShuffleSuite.scala @@ -17,7 +17,7 @@ package org.apache.gluten.execution import org.apache.spark.SparkConf -import org.apache.spark.sql.catalyst.optimizer.BuildLeft +import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight} class GlutenClickHouseTPCHNullableColumnarShuffleSuite extends GlutenClickHouseTPCHAbstractSuite { @@ -63,7 +63,11 @@ class GlutenClickHouseTPCHNullableColumnarShuffleSuite extends GlutenClickHouseT val shjBuildLeft = df.queryExecution.executedPlan.collect { case shj: ShuffledHashJoinExecTransformerBase if shj.joinBuildSide == BuildLeft => shj } - assert(shjBuildLeft.size == 2) + assert(shjBuildLeft.size == 1) + val shjBuildRight = df.queryExecution.executedPlan.collect { + case shj: ShuffledHashJoinExecTransformerBase if shj.joinBuildSide == BuildRight => shj + } + assert(shjBuildRight.size == 1) } } } diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableSuite.scala index 0eb4de74209b..7f62c6993157 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableSuite.scala @@ -20,7 +20,7 @@ import org.apache.gluten.GlutenConfig import org.apache.spark.SparkConf import org.apache.spark.sql.catalyst.expressions.Alias -import org.apache.spark.sql.catalyst.optimizer.BuildLeft +import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight} class GlutenClickHouseTPCHNullableSuite extends GlutenClickHouseTPCHAbstractSuite { @@ -66,7 +66,11 @@ class GlutenClickHouseTPCHNullableSuite extends GlutenClickHouseTPCHAbstractSuit val shjBuildLeft = df.queryExecution.executedPlan.collect { case shj: ShuffledHashJoinExecTransformerBase if shj.joinBuildSide == BuildLeft => shj } - assert(shjBuildLeft.size == 2) + assert(shjBuildLeft.size == 1) + val shjBuildRight = df.queryExecution.executedPlan.collect { + case shj: ShuffledHashJoinExecTransformerBase if shj.joinBuildSide == BuildRight => shj + } + assert(shjBuildRight.size == 1) } } } diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSuite.scala index 96b2cb09b163..d26891ddb1ea 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSuite.scala @@ -18,7 +18,7 @@ package org.apache.gluten.execution import org.apache.spark.SparkConf import org.apache.spark.sql.{Row, TestUtils} -import org.apache.spark.sql.catalyst.optimizer.BuildLeft +import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight} import org.apache.spark.sql.types.{DecimalType, StructType} // Some sqls' line length exceeds 100 @@ -73,7 +73,11 @@ class GlutenClickHouseTPCHSuite extends GlutenClickHouseTPCHAbstractSuite { val shjBuildLeft = df.queryExecution.executedPlan.collect { case shj: ShuffledHashJoinExecTransformerBase if shj.joinBuildSide == BuildLeft => shj } - assert(shjBuildLeft.size == 2) + assert(shjBuildLeft.size == 1) + val shjBuildRight = df.queryExecution.executedPlan.collect { + case shj: ShuffledHashJoinExecTransformerBase if shj.joinBuildSide == BuildRight => shj + } + assert(shjBuildRight.size == 1) } } } diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj-ras/spark32/20.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj-ras/spark32/20.txt index 6ebe36be3494..a1f1bb51cb98 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj-ras/spark32/20.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj-ras/spark32/20.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (110) +AdaptiveSparkPlan (112) +- == Final Plan == VeloxColumnarToRowExec (73) +- ^ SortExecTransformer (71) @@ -59,42 +59,44 @@ AdaptiveSparkPlan (110) +- ^ NoopFilter (56) +- ^ Scan parquet (55) +- == Initial Plan == - Sort (109) - +- Exchange (108) - +- Project (107) - +- BroadcastHashJoin Inner BuildRight (106) - :- Project (101) - : +- ShuffledHashJoin LeftSemi BuildRight (100) - : :- Exchange (76) - : : +- Filter (75) - : : +- Scan parquet (74) - : +- Exchange (99) - : +- Project (98) - : +- BroadcastHashJoin Inner BuildLeft (97) - : :- BroadcastExchange (84) - : : +- BroadcastHashJoin LeftSemi BuildRight (83) - : : :- Filter (78) - : : : +- Scan parquet (77) - : : +- BroadcastExchange (82) - : : +- Project (81) - : : +- Filter (80) - : : +- Scan parquet (79) - : +- Filter (96) - : +- HashAggregate (95) - : +- Exchange (94) - : +- HashAggregate (93) - : +- BroadcastHashJoin LeftSemi BuildRight (92) - : :- Project (87) - : : +- Filter (86) - : : +- Scan parquet (85) - : +- BroadcastExchange (91) - : +- Project (90) - : +- Filter (89) - : +- Scan parquet (88) - +- BroadcastExchange (105) - +- Project (104) - +- Filter (103) - +- Scan parquet (102) + Sort (111) + +- Exchange (110) + +- Project (109) + +- BroadcastHashJoin Inner BuildRight (108) + :- Project (103) + : +- SortMergeJoin LeftSemi (102) + : :- Sort (77) + : : +- Exchange (76) + : : +- Filter (75) + : : +- Scan parquet (74) + : +- Sort (101) + : +- Exchange (100) + : +- Project (99) + : +- BroadcastHashJoin Inner BuildLeft (98) + : :- BroadcastExchange (85) + : : +- BroadcastHashJoin LeftSemi BuildRight (84) + : : :- Filter (79) + : : : +- Scan parquet (78) + : : +- BroadcastExchange (83) + : : +- Project (82) + : : +- Filter (81) + : : +- Scan parquet (80) + : +- Filter (97) + : +- HashAggregate (96) + : +- Exchange (95) + : +- HashAggregate (94) + : +- BroadcastHashJoin LeftSemi BuildRight (93) + : :- Project (88) + : : +- Filter (87) + : : +- Scan parquet (86) + : +- BroadcastExchange (92) + : +- Project (91) + : +- Filter (90) + : +- Scan parquet (89) + +- BroadcastExchange (107) + +- Project (106) + +- Filter (105) + +- Scan parquet (104) (1) Scan parquet @@ -412,164 +414,172 @@ Condition : isnotnull(s_nationkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(77) Scan parquet +(77) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(78) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_availqty), IsNotNull(ps_partkey), IsNotNull(ps_suppkey)] ReadSchema: struct -(78) Filter +(79) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Condition : ((isnotnull(ps_availqty#X) AND isnotnull(ps_partkey#X)) AND isnotnull(ps_suppkey#X)) -(79) Scan parquet +(80) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(80) Filter +(81) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(81) Project +(82) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(82) BroadcastExchange +(83) BroadcastExchange Input [1]: [p_partkey#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] -(83) BroadcastHashJoin +(84) BroadcastHashJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(84) BroadcastExchange +(85) BroadcastExchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false], input[1, bigint, false]),false), [plan_id=X] -(85) Scan parquet +(86) Scan parquet Output [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)] ReadSchema: struct -(86) Filter +(87) Filter Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND isnotnull(l_partkey#X)) AND isnotnull(l_suppkey#X)) -(87) Project +(88) Project Output [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] -(88) Scan parquet +(89) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(89) Filter +(90) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(90) Project +(91) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(91) BroadcastExchange +(92) BroadcastExchange Input [1]: [p_partkey#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] -(92) BroadcastHashJoin +(93) BroadcastHashJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(93) HashAggregate +(94) HashAggregate Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -(94) Exchange +(95) Exchange Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(95) HashAggregate +(96) HashAggregate Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [CheckOverflow((0.50 * promote_precision(sum(l_quantity#X)#X)), DecimalType(24,3), true) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(96) Filter +(97) Filter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Condition : isnotnull((0.5 * sum(l_quantity))#X) -(97) BroadcastHashJoin +(98) BroadcastHashJoin Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join condition: (cast(cast(ps_availqty#X as decimal(10,0)) as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(98) Project +(99) Project Output [1]: [ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(99) Exchange +(100) Exchange Input [1]: [ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(100) ShuffledHashJoin +(101) Sort +Input [1]: [ps_suppkey#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(102) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join condition: None -(101) Project +(103) Project Output [3]: [s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(102) Scan parquet +(104) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(103) Filter +(105) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(104) Project +(106) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(105) BroadcastExchange +(107) BroadcastExchange Input [1]: [n_nationkey#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] -(106) BroadcastHashJoin +(108) BroadcastHashJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(107) Project +(109) Project Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(108) Exchange +(110) Exchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(109) Sort +(111) Sort Input [2]: [s_name#X, s_address#X] Arguments: [s_name#X ASC NULLS FIRST], true, 0 -(110) AdaptiveSparkPlan +(112) AdaptiveSparkPlan Output [2]: [s_name#X, s_address#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj-ras/spark33/20.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj-ras/spark33/20.txt index 27e59afbb7fc..4a899ae239be 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj-ras/spark33/20.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj-ras/spark33/20.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (107) +AdaptiveSparkPlan (109) +- == Final Plan == VeloxColumnarToRowExec (70) +- AQEShuffleRead (69) @@ -58,42 +58,44 @@ AdaptiveSparkPlan (107) +- ^ NoopFilter (56) +- ^ Scan parquet (55) +- == Initial Plan == - Sort (106) - +- Exchange (105) - +- Project (104) - +- BroadcastHashJoin Inner BuildRight (103) - :- Project (98) - : +- ShuffledHashJoin LeftSemi BuildRight (97) - : :- Exchange (73) - : : +- Filter (72) - : : +- Scan parquet (71) - : +- Exchange (96) - : +- Project (95) - : +- BroadcastHashJoin Inner BuildLeft (94) - : :- BroadcastExchange (81) - : : +- BroadcastHashJoin LeftSemi BuildRight (80) - : : :- Filter (75) - : : : +- Scan parquet (74) - : : +- BroadcastExchange (79) - : : +- Project (78) - : : +- Filter (77) - : : +- Scan parquet (76) - : +- Filter (93) - : +- HashAggregate (92) - : +- Exchange (91) - : +- HashAggregate (90) - : +- BroadcastHashJoin LeftSemi BuildRight (89) - : :- Project (84) - : : +- Filter (83) - : : +- Scan parquet (82) - : +- BroadcastExchange (88) - : +- Project (87) - : +- Filter (86) - : +- Scan parquet (85) - +- BroadcastExchange (102) - +- Project (101) - +- Filter (100) - +- Scan parquet (99) + Sort (108) + +- Exchange (107) + +- Project (106) + +- BroadcastHashJoin Inner BuildRight (105) + :- Project (100) + : +- SortMergeJoin LeftSemi (99) + : :- Sort (74) + : : +- Exchange (73) + : : +- Filter (72) + : : +- Scan parquet (71) + : +- Sort (98) + : +- Exchange (97) + : +- Project (96) + : +- BroadcastHashJoin Inner BuildLeft (95) + : :- BroadcastExchange (82) + : : +- BroadcastHashJoin LeftSemi BuildRight (81) + : : :- Filter (76) + : : : +- Scan parquet (75) + : : +- BroadcastExchange (80) + : : +- Project (79) + : : +- Filter (78) + : : +- Scan parquet (77) + : +- Filter (94) + : +- HashAggregate (93) + : +- Exchange (92) + : +- HashAggregate (91) + : +- BroadcastHashJoin LeftSemi BuildRight (90) + : :- Project (85) + : : +- Filter (84) + : : +- Scan parquet (83) + : +- BroadcastExchange (89) + : +- Project (88) + : +- Filter (87) + : +- Scan parquet (86) + +- BroadcastExchange (104) + +- Project (103) + +- Filter (102) + +- Scan parquet (101) (1) Scan parquet @@ -401,164 +403,172 @@ Condition : isnotnull(s_nationkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) Scan parquet +(74) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(75) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_availqty), IsNotNull(ps_partkey), IsNotNull(ps_suppkey)] ReadSchema: struct -(75) Filter +(76) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Condition : ((isnotnull(ps_availqty#X) AND isnotnull(ps_partkey#X)) AND isnotnull(ps_suppkey#X)) -(76) Scan parquet +(77) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(77) Filter +(78) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(78) Project +(79) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(79) BroadcastExchange +(80) BroadcastExchange Input [1]: [p_partkey#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] -(80) BroadcastHashJoin +(81) BroadcastHashJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(81) BroadcastExchange +(82) BroadcastExchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false], input[1, bigint, false]),false), [plan_id=X] -(82) Scan parquet +(83) Scan parquet Output [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)] ReadSchema: struct -(83) Filter +(84) Filter Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND isnotnull(l_partkey#X)) AND isnotnull(l_suppkey#X)) -(84) Project +(85) Project Output [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] -(85) Scan parquet +(86) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(86) Filter +(87) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(87) Project +(88) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(88) BroadcastExchange +(89) BroadcastExchange Input [1]: [p_partkey#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] -(89) BroadcastHashJoin +(90) BroadcastHashJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(90) HashAggregate +(91) HashAggregate Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -(91) Exchange +(92) Exchange Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(92) HashAggregate +(93) HashAggregate Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [CheckOverflow((0.50 * promote_precision(sum(l_quantity#X)#X)), DecimalType(24,3)) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(93) Filter +(94) Filter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Condition : isnotnull((0.5 * sum(l_quantity))#X) -(94) BroadcastHashJoin +(95) BroadcastHashJoin Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join condition: (cast(ps_availqty#X as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(95) Project +(96) Project Output [1]: [ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(96) Exchange +(97) Exchange Input [1]: [ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(97) ShuffledHashJoin +(98) Sort +Input [1]: [ps_suppkey#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(99) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join condition: None -(98) Project +(100) Project Output [3]: [s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(99) Scan parquet +(101) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(100) Filter +(102) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(101) Project +(103) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(102) BroadcastExchange +(104) BroadcastExchange Input [1]: [n_nationkey#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] -(103) BroadcastHashJoin +(105) BroadcastHashJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(104) Project +(106) Project Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(105) Exchange +(107) Exchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(106) Sort +(108) Sort Input [2]: [s_name#X, s_address#X] Arguments: [s_name#X ASC NULLS FIRST], true, 0 -(107) AdaptiveSparkPlan +(109) AdaptiveSparkPlan Output [2]: [s_name#X, s_address#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark32/20.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark32/20.txt index 7b494469aacc..9e03d8319537 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark32/20.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark32/20.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (110) +AdaptiveSparkPlan (112) +- == Final Plan == VeloxColumnarToRowExec (73) +- ^ SortExecTransformer (71) @@ -59,42 +59,44 @@ AdaptiveSparkPlan (110) +- ^ FilterExecTransformer (56) +- ^ Scan parquet (55) +- == Initial Plan == - Sort (109) - +- Exchange (108) - +- Project (107) - +- BroadcastHashJoin Inner BuildRight (106) - :- Project (101) - : +- ShuffledHashJoin LeftSemi BuildRight (100) - : :- Exchange (76) - : : +- Filter (75) - : : +- Scan parquet (74) - : +- Exchange (99) - : +- Project (98) - : +- BroadcastHashJoin Inner BuildLeft (97) - : :- BroadcastExchange (84) - : : +- BroadcastHashJoin LeftSemi BuildRight (83) - : : :- Filter (78) - : : : +- Scan parquet (77) - : : +- BroadcastExchange (82) - : : +- Project (81) - : : +- Filter (80) - : : +- Scan parquet (79) - : +- Filter (96) - : +- HashAggregate (95) - : +- Exchange (94) - : +- HashAggregate (93) - : +- BroadcastHashJoin LeftSemi BuildRight (92) - : :- Project (87) - : : +- Filter (86) - : : +- Scan parquet (85) - : +- BroadcastExchange (91) - : +- Project (90) - : +- Filter (89) - : +- Scan parquet (88) - +- BroadcastExchange (105) - +- Project (104) - +- Filter (103) - +- Scan parquet (102) + Sort (111) + +- Exchange (110) + +- Project (109) + +- BroadcastHashJoin Inner BuildRight (108) + :- Project (103) + : +- SortMergeJoin LeftSemi (102) + : :- Sort (77) + : : +- Exchange (76) + : : +- Filter (75) + : : +- Scan parquet (74) + : +- Sort (101) + : +- Exchange (100) + : +- Project (99) + : +- BroadcastHashJoin Inner BuildLeft (98) + : :- BroadcastExchange (85) + : : +- BroadcastHashJoin LeftSemi BuildRight (84) + : : :- Filter (79) + : : : +- Scan parquet (78) + : : +- BroadcastExchange (83) + : : +- Project (82) + : : +- Filter (81) + : : +- Scan parquet (80) + : +- Filter (97) + : +- HashAggregate (96) + : +- Exchange (95) + : +- HashAggregate (94) + : +- BroadcastHashJoin LeftSemi BuildRight (93) + : :- Project (88) + : : +- Filter (87) + : : +- Scan parquet (86) + : +- BroadcastExchange (92) + : +- Project (91) + : +- Filter (90) + : +- Scan parquet (89) + +- BroadcastExchange (107) + +- Project (106) + +- Filter (105) + +- Scan parquet (104) (1) Scan parquet @@ -412,164 +414,172 @@ Condition : isnotnull(s_nationkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(77) Scan parquet +(77) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(78) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_availqty), IsNotNull(ps_partkey), IsNotNull(ps_suppkey)] ReadSchema: struct -(78) Filter +(79) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Condition : ((isnotnull(ps_availqty#X) AND isnotnull(ps_partkey#X)) AND isnotnull(ps_suppkey#X)) -(79) Scan parquet +(80) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(80) Filter +(81) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(81) Project +(82) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(82) BroadcastExchange +(83) BroadcastExchange Input [1]: [p_partkey#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] -(83) BroadcastHashJoin +(84) BroadcastHashJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(84) BroadcastExchange +(85) BroadcastExchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false], input[1, bigint, false]),false), [plan_id=X] -(85) Scan parquet +(86) Scan parquet Output [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)] ReadSchema: struct -(86) Filter +(87) Filter Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND isnotnull(l_partkey#X)) AND isnotnull(l_suppkey#X)) -(87) Project +(88) Project Output [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] -(88) Scan parquet +(89) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(89) Filter +(90) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(90) Project +(91) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(91) BroadcastExchange +(92) BroadcastExchange Input [1]: [p_partkey#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] -(92) BroadcastHashJoin +(93) BroadcastHashJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(93) HashAggregate +(94) HashAggregate Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -(94) Exchange +(95) Exchange Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(95) HashAggregate +(96) HashAggregate Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [CheckOverflow((0.50 * promote_precision(sum(l_quantity#X)#X)), DecimalType(24,3), true) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(96) Filter +(97) Filter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Condition : isnotnull((0.5 * sum(l_quantity))#X) -(97) BroadcastHashJoin +(98) BroadcastHashJoin Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join condition: (cast(cast(ps_availqty#X as decimal(10,0)) as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(98) Project +(99) Project Output [1]: [ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(99) Exchange +(100) Exchange Input [1]: [ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(100) ShuffledHashJoin +(101) Sort +Input [1]: [ps_suppkey#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(102) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join condition: None -(101) Project +(103) Project Output [3]: [s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(102) Scan parquet +(104) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(103) Filter +(105) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(104) Project +(106) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(105) BroadcastExchange +(107) BroadcastExchange Input [1]: [n_nationkey#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] -(106) BroadcastHashJoin +(108) BroadcastHashJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(107) Project +(109) Project Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(108) Exchange +(110) Exchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(109) Sort +(111) Sort Input [2]: [s_name#X, s_address#X] Arguments: [s_name#X ASC NULLS FIRST], true, 0 -(110) AdaptiveSparkPlan +(112) AdaptiveSparkPlan Output [2]: [s_name#X, s_address#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark33/20.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark33/20.txt index a0edead7013d..5cd3c9d35c2c 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark33/20.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-bhj/spark33/20.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (107) +AdaptiveSparkPlan (109) +- == Final Plan == VeloxColumnarToRowExec (70) +- AQEShuffleRead (69) @@ -58,42 +58,44 @@ AdaptiveSparkPlan (107) +- ^ FilterExecTransformer (56) +- ^ Scan parquet (55) +- == Initial Plan == - Sort (106) - +- Exchange (105) - +- Project (104) - +- BroadcastHashJoin Inner BuildRight (103) - :- Project (98) - : +- ShuffledHashJoin LeftSemi BuildRight (97) - : :- Exchange (73) - : : +- Filter (72) - : : +- Scan parquet (71) - : +- Exchange (96) - : +- Project (95) - : +- BroadcastHashJoin Inner BuildLeft (94) - : :- BroadcastExchange (81) - : : +- BroadcastHashJoin LeftSemi BuildRight (80) - : : :- Filter (75) - : : : +- Scan parquet (74) - : : +- BroadcastExchange (79) - : : +- Project (78) - : : +- Filter (77) - : : +- Scan parquet (76) - : +- Filter (93) - : +- HashAggregate (92) - : +- Exchange (91) - : +- HashAggregate (90) - : +- BroadcastHashJoin LeftSemi BuildRight (89) - : :- Project (84) - : : +- Filter (83) - : : +- Scan parquet (82) - : +- BroadcastExchange (88) - : +- Project (87) - : +- Filter (86) - : +- Scan parquet (85) - +- BroadcastExchange (102) - +- Project (101) - +- Filter (100) - +- Scan parquet (99) + Sort (108) + +- Exchange (107) + +- Project (106) + +- BroadcastHashJoin Inner BuildRight (105) + :- Project (100) + : +- SortMergeJoin LeftSemi (99) + : :- Sort (74) + : : +- Exchange (73) + : : +- Filter (72) + : : +- Scan parquet (71) + : +- Sort (98) + : +- Exchange (97) + : +- Project (96) + : +- BroadcastHashJoin Inner BuildLeft (95) + : :- BroadcastExchange (82) + : : +- BroadcastHashJoin LeftSemi BuildRight (81) + : : :- Filter (76) + : : : +- Scan parquet (75) + : : +- BroadcastExchange (80) + : : +- Project (79) + : : +- Filter (78) + : : +- Scan parquet (77) + : +- Filter (94) + : +- HashAggregate (93) + : +- Exchange (92) + : +- HashAggregate (91) + : +- BroadcastHashJoin LeftSemi BuildRight (90) + : :- Project (85) + : : +- Filter (84) + : : +- Scan parquet (83) + : +- BroadcastExchange (89) + : +- Project (88) + : +- Filter (87) + : +- Scan parquet (86) + +- BroadcastExchange (104) + +- Project (103) + +- Filter (102) + +- Scan parquet (101) (1) Scan parquet @@ -401,164 +403,172 @@ Condition : isnotnull(s_nationkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) Scan parquet +(74) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(75) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_availqty), IsNotNull(ps_partkey), IsNotNull(ps_suppkey)] ReadSchema: struct -(75) Filter +(76) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Condition : ((isnotnull(ps_availqty#X) AND isnotnull(ps_partkey#X)) AND isnotnull(ps_suppkey#X)) -(76) Scan parquet +(77) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(77) Filter +(78) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(78) Project +(79) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(79) BroadcastExchange +(80) BroadcastExchange Input [1]: [p_partkey#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] -(80) BroadcastHashJoin +(81) BroadcastHashJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(81) BroadcastExchange +(82) BroadcastExchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false], input[1, bigint, false]),false), [plan_id=X] -(82) Scan parquet +(83) Scan parquet Output [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)] ReadSchema: struct -(83) Filter +(84) Filter Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND isnotnull(l_partkey#X)) AND isnotnull(l_suppkey#X)) -(84) Project +(85) Project Output [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] -(85) Scan parquet +(86) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(86) Filter +(87) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(87) Project +(88) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(88) BroadcastExchange +(89) BroadcastExchange Input [1]: [p_partkey#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] -(89) BroadcastHashJoin +(90) BroadcastHashJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(90) HashAggregate +(91) HashAggregate Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -(91) Exchange +(92) Exchange Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(92) HashAggregate +(93) HashAggregate Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [CheckOverflow((0.50 * promote_precision(sum(l_quantity#X)#X)), DecimalType(24,3)) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(93) Filter +(94) Filter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Condition : isnotnull((0.5 * sum(l_quantity))#X) -(94) BroadcastHashJoin +(95) BroadcastHashJoin Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join condition: (cast(ps_availqty#X as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(95) Project +(96) Project Output [1]: [ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(96) Exchange +(97) Exchange Input [1]: [ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(97) ShuffledHashJoin +(98) Sort +Input [1]: [ps_suppkey#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(99) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join condition: None -(98) Project +(100) Project Output [3]: [s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(99) Scan parquet +(101) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(100) Filter +(102) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(101) Project +(103) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(102) BroadcastExchange +(104) BroadcastExchange Input [1]: [n_nationkey#X] Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=X] -(103) BroadcastHashJoin +(105) BroadcastHashJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(104) Project +(106) Project Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(105) Exchange +(107) Exchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(106) Sort +(108) Sort Input [2]: [s_name#X, s_address#X] Arguments: [s_name#X ASC NULLS FIRST], true, 0 -(107) AdaptiveSparkPlan +(109) AdaptiveSparkPlan Output [2]: [s_name#X, s_address#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/10.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/10.txt index 85176d8c6011..ec46bfd07b91 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/10.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/10.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (94) +AdaptiveSparkPlan (100) +- == Final Plan == VeloxColumnarToRowExec (67) +- TakeOrderedAndProjectExecTransformer (66) @@ -54,32 +54,38 @@ AdaptiveSparkPlan (94) +- ^ NoopFilter (45) +- ^ Scan parquet (44) +- == Initial Plan == - TakeOrderedAndProject (93) - +- HashAggregate (92) - +- Exchange (91) - +- HashAggregate (90) - +- Project (89) - +- ShuffledHashJoin Inner BuildRight (88) - :- Exchange (84) - : +- Project (83) - : +- ShuffledHashJoin Inner BuildRight (82) - : :- Exchange (77) - : : +- Project (76) - : : +- ShuffledHashJoin Inner BuildRight (75) - : : :- Exchange (70) - : : : +- Filter (69) - : : : +- Scan parquet (68) - : : +- Exchange (74) - : : +- Project (73) - : : +- Filter (72) - : : +- Scan parquet (71) - : +- Exchange (81) - : +- Project (80) - : +- Filter (79) - : +- Scan parquet (78) - +- Exchange (87) - +- Filter (86) - +- Scan parquet (85) + TakeOrderedAndProject (99) + +- HashAggregate (98) + +- Exchange (97) + +- HashAggregate (96) + +- Project (95) + +- SortMergeJoin Inner (94) + :- Sort (89) + : +- Exchange (88) + : +- Project (87) + : +- SortMergeJoin Inner (86) + : :- Sort (80) + : : +- Exchange (79) + : : +- Project (78) + : : +- SortMergeJoin Inner (77) + : : :- Sort (71) + : : : +- Exchange (70) + : : : +- Filter (69) + : : : +- Scan parquet (68) + : : +- Sort (76) + : : +- Exchange (75) + : : +- Project (74) + : : +- Filter (73) + : : +- Scan parquet (72) + : +- Sort (85) + : +- Exchange (84) + : +- Project (83) + : +- Filter (82) + : +- Scan parquet (81) + +- Sort (93) + +- Exchange (92) + +- Filter (91) + +- Scan parquet (90) (1) Scan parquet @@ -371,116 +377,140 @@ Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) Input [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(71) Scan parquet +(71) Sort +Input [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(72) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1993-10-01), LessThan(o_orderdate,1994-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(72) Filter +(73) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1993-10-01)) AND (o_orderdate#X < 1994-01-01)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(73) Project +(74) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] -(74) Exchange +(75) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(75) ShuffledHashJoin +(76) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(77) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(76) Project +(78) Project Output [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, o_custkey#X] -(77) Exchange +(79) Exchange Input [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(78) Scan parquet +(80) Sort +Input [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(81) Scan parquet Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_returnflag), EqualTo(l_returnflag,R), IsNotNull(l_orderkey)] ReadSchema: struct -(79) Filter +(82) Filter Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] Condition : ((isnotnull(l_returnflag#X) AND (l_returnflag#X = R)) AND isnotnull(l_orderkey#X)) -(80) Project +(83) Project Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] -(81) Exchange +(84) Exchange Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(82) ShuffledHashJoin +(85) Sort +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(86) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(83) Project +(87) Project Output [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X] -(84) Exchange +(88) Exchange Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(85) Scan parquet +(89) Sort +Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(90) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(86) Filter +(91) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(87) Exchange +(92) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(88) ShuffledHashJoin +(93) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(94) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(89) Project +(95) Project Output [9]: [c_custkey#X, c_name#X, c_address#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_name#X] Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_nationkey#X, n_name#X] -(90) HashAggregate +(96) HashAggregate Input [9]: [c_custkey#X, c_name#X, c_address#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_name#X] Keys [7]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] -(91) Exchange +(97) Exchange Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] Arguments: hashpartitioning(c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(92) HashAggregate +(98) HashAggregate Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] Keys [7]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [8]: [c_custkey#X, c_name#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X AS revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] -(93) TakeOrderedAndProject +(99) TakeOrderedAndProject Input [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] Arguments: X, [revenue#X DESC NULLS LAST], [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] -(94) AdaptiveSparkPlan +(100) AdaptiveSparkPlan Output [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/11.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/11.txt index 7c749c0a5ec6..cccf1408bea9 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/11.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/11.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (78) +AdaptiveSparkPlan (82) +- == Final Plan == VeloxColumnarToRowExec (56) +- ^ SortExecTransformer (54) @@ -45,27 +45,31 @@ AdaptiveSparkPlan (78) +- ^ NoopFilter (28) +- ^ Scan parquet (27) +- == Initial Plan == - Sort (77) - +- Exchange (76) - +- Filter (75) - +- HashAggregate (74) - +- Exchange (73) - +- HashAggregate (72) - +- Project (71) - +- ShuffledHashJoin Inner BuildRight (70) - :- Exchange (65) - : +- Project (64) - : +- ShuffledHashJoin Inner BuildRight (63) - : :- Exchange (59) - : : +- Filter (58) - : : +- Scan parquet (57) - : +- Exchange (62) - : +- Filter (61) - : +- Scan parquet (60) - +- Exchange (69) - +- Project (68) - +- Filter (67) - +- Scan parquet (66) + Sort (81) + +- Exchange (80) + +- Filter (79) + +- HashAggregate (78) + +- Exchange (77) + +- HashAggregate (76) + +- Project (75) + +- SortMergeJoin Inner (74) + :- Sort (68) + : +- Exchange (67) + : +- Project (66) + : +- SortMergeJoin Inner (65) + : :- Sort (60) + : : +- Exchange (59) + : : +- Filter (58) + : : +- Scan parquet (57) + : +- Sort (64) + : +- Exchange (63) + : +- Filter (62) + : +- Scan parquet (61) + +- Sort (73) + +- Exchange (72) + +- Project (71) + +- Filter (70) + +- Scan parquet (69) (1) Scan parquet @@ -311,92 +315,108 @@ Condition : isnotnull(ps_suppkey#X) Input [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(60) Scan parquet +(60) Sort +Input [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(61) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(61) Filter +(62) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(62) Exchange +(63) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(63) ShuffledHashJoin +(64) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(65) SortMergeJoin Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(64) Project +(66) Project Output [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(65) Exchange +(67) Exchange Input [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(66) Scan parquet +(68) Sort +Input [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(69) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,GERMANY), IsNotNull(n_nationkey)] ReadSchema: struct -(67) Filter +(70) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = GERMANY)) AND isnotnull(n_nationkey#X)) -(68) Project +(71) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(69) Exchange +(72) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(70) ShuffledHashJoin +(73) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(74) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(71) Project +(75) Project Output [3]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X] Input [5]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(72) HashAggregate +(76) HashAggregate Input [3]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X] Keys [1]: [ps_partkey#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(cast(ps_availqty#X as decimal(10,0)) as decimal(12,2)))), DecimalType(23,2), true))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [ps_partkey#X, sum#X, isEmpty#X] -(73) Exchange +(77) Exchange Input [3]: [ps_partkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) HashAggregate +(78) HashAggregate Input [3]: [ps_partkey#X, sum#X, isEmpty#X] Keys [1]: [ps_partkey#X] Functions [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(cast(ps_availqty#X as decimal(10,0)) as decimal(12,2)))), DecimalType(23,2), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(cast(ps_availqty#X as decimal(10,0)) as decimal(12,2)))), DecimalType(23,2), true))#X] Results [2]: [ps_partkey#X, sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(cast(ps_availqty#X as decimal(10,0)) as decimal(12,2)))), DecimalType(23,2), true))#X AS value#X] -(75) Filter +(79) Filter Input [2]: [ps_partkey#X, value#X] Condition : (isnotnull(value#X) AND (cast(value#X as decimal(38,6)) > Subquery subquery#X, [id=#X])) -(76) Exchange +(80) Exchange Input [2]: [ps_partkey#X, value#X] Arguments: rangepartitioning(value#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(77) Sort +(81) Sort Input [2]: [ps_partkey#X, value#X] Arguments: [value#X DESC NULLS LAST], true, 0 -(78) AdaptiveSparkPlan +(82) AdaptiveSparkPlan Output [2]: [ps_partkey#X, value#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/12.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/12.txt index 5cf27c6e0cb9..17cdf62608cc 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/12.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/12.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (53) +AdaptiveSparkPlan (55) +- == Final Plan == VeloxColumnarToRowExec (38) +- ^ SortExecTransformer (36) @@ -31,20 +31,22 @@ AdaptiveSparkPlan (53) +- ^ NoopFilter (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (52) - +- Exchange (51) - +- HashAggregate (50) - +- Exchange (49) - +- HashAggregate (48) - +- Project (47) - +- ShuffledHashJoin Inner BuildLeft (46) - :- Exchange (41) - : +- Filter (40) - : +- Scan parquet (39) - +- Exchange (45) - +- Project (44) - +- Filter (43) - +- Scan parquet (42) + Sort (54) + +- Exchange (53) + +- HashAggregate (52) + +- Exchange (51) + +- HashAggregate (50) + +- Project (49) + +- SortMergeJoin Inner (48) + :- Sort (42) + : +- Exchange (41) + : +- Filter (40) + : +- Scan parquet (39) + +- Sort (47) + +- Exchange (46) + +- Project (45) + +- Filter (44) + +- Scan parquet (43) (1) Scan parquet @@ -218,60 +220,68 @@ Condition : isnotnull(o_orderkey#X) Input [2]: [o_orderkey#X, o_orderpriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(42) Scan parquet +(42) Sort +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(43) Scan parquet Output [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate), IsNotNull(l_shipdate), In(l_shipmode, [MAIL,SHIP]), GreaterThanOrEqual(l_receiptdate,1994-01-01), LessThan(l_receiptdate,1995-01-01), IsNotNull(l_orderkey)] ReadSchema: struct -(43) Filter +(44) Filter Input [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] Condition : ((((((((isnotnull(l_commitdate#X) AND isnotnull(l_receiptdate#X)) AND isnotnull(l_shipdate#X)) AND l_shipmode#X IN (MAIL,SHIP)) AND (l_commitdate#X < l_receiptdate#X)) AND (l_shipdate#X < l_commitdate#X)) AND (l_receiptdate#X >= 1994-01-01)) AND (l_receiptdate#X < 1995-01-01)) AND isnotnull(l_orderkey#X)) -(44) Project +(45) Project Output [2]: [l_orderkey#X, l_shipmode#X] Input [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] -(45) Exchange +(46) Exchange Input [2]: [l_orderkey#X, l_shipmode#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(46) ShuffledHashJoin +(47) Sort +Input [2]: [l_orderkey#X, l_shipmode#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(48) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(47) Project +(49) Project Output [2]: [o_orderpriority#X, l_shipmode#X] Input [4]: [o_orderkey#X, o_orderpriority#X, l_orderkey#X, l_shipmode#X] -(48) HashAggregate +(50) HashAggregate Input [2]: [o_orderpriority#X, l_shipmode#X] Keys [1]: [l_shipmode#X] Functions [2]: [partial_sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)] Aggregate Attributes [2]: [sum#X, sum#X] Results [3]: [l_shipmode#X, sum#X, sum#X] -(49) Exchange +(51) Exchange Input [3]: [l_shipmode#X, sum#X, sum#X] Arguments: hashpartitioning(l_shipmode#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(50) HashAggregate +(52) HashAggregate Input [3]: [l_shipmode#X, sum#X, sum#X] Keys [1]: [l_shipmode#X] Functions [2]: [sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END), sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)] Aggregate Attributes [2]: [sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X, sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X] Results [3]: [l_shipmode#X, sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X AS high_line_count#X, sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X AS low_line_count#X] -(51) Exchange +(53) Exchange Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: rangepartitioning(l_shipmode#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(52) Sort +(54) Sort Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: [l_shipmode#X ASC NULLS FIRST], true, 0 -(53) AdaptiveSparkPlan +(55) AdaptiveSparkPlan Output [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/13.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/13.txt index d3904d8d079e..730f0e0a438a 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/13.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/13.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (56) +AdaptiveSparkPlan (58) +- == Final Plan == VeloxColumnarToRowExec (40) +- ^ SortExecTransformer (38) @@ -33,21 +33,23 @@ AdaptiveSparkPlan (56) +- ^ NoopFilter (10) +- ^ Scan parquet (9) +- == Initial Plan == - Sort (55) - +- Exchange (54) - +- HashAggregate (53) - +- Exchange (52) - +- HashAggregate (51) - +- HashAggregate (50) - +- HashAggregate (49) - +- Project (48) - +- ShuffledHashJoin LeftOuter BuildRight (47) - :- Exchange (42) - : +- Scan parquet (41) - +- Exchange (46) - +- Project (45) - +- Filter (44) - +- Scan parquet (43) + Sort (57) + +- Exchange (56) + +- HashAggregate (55) + +- Exchange (54) + +- HashAggregate (53) + +- HashAggregate (52) + +- HashAggregate (51) + +- Project (50) + +- SortMergeJoin LeftOuter (49) + :- Sort (43) + : +- Exchange (42) + : +- Scan parquet (41) + +- Sort (48) + +- Exchange (47) + +- Project (46) + +- Filter (45) + +- Scan parquet (44) (1) Scan parquet @@ -229,74 +231,82 @@ ReadSchema: struct Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) Scan parquet +(43) Sort +Input [1]: [c_custkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(44) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_comment), IsNotNull(o_custkey)] ReadSchema: struct -(44) Filter +(45) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Condition : ((isnotnull(o_comment#X) AND NOT o_comment#X LIKE %special%requests%) AND isnotnull(o_custkey#X)) -(45) Project +(46) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] -(46) Exchange +(47) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) ShuffledHashJoin +(48) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(49) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(48) Project +(50) Project Output [2]: [c_custkey#X, o_orderkey#X] Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] -(49) HashAggregate +(51) HashAggregate Input [2]: [c_custkey#X, o_orderkey#X] Keys [1]: [c_custkey#X] Functions [1]: [partial_count(o_orderkey#X)] Aggregate Attributes [1]: [count#X] Results [2]: [c_custkey#X, count#X] -(50) HashAggregate +(52) HashAggregate Input [2]: [c_custkey#X, count#X] Keys [1]: [c_custkey#X] Functions [1]: [count(o_orderkey#X)] Aggregate Attributes [1]: [count(o_orderkey#X)#X] Results [1]: [count(o_orderkey#X)#X AS c_count#X] -(51) HashAggregate +(53) HashAggregate Input [1]: [c_count#X] Keys [1]: [c_count#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [c_count#X, count#X] -(52) Exchange +(54) Exchange Input [2]: [c_count#X, count#X] Arguments: hashpartitioning(c_count#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(53) HashAggregate +(55) HashAggregate Input [2]: [c_count#X, count#X] Keys [1]: [c_count#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [c_count#X, count(1)#X AS custdist#X] -(54) Exchange +(56) Exchange Input [2]: [c_count#X, custdist#X] Arguments: rangepartitioning(custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(55) Sort +(57) Sort Input [2]: [c_count#X, custdist#X] Arguments: [custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST], true, 0 -(56) AdaptiveSparkPlan +(58) AdaptiveSparkPlan Output [2]: [c_count#X, custdist#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/14.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/14.txt index 00b5fb4142f3..55111a31f874 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/14.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/14.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (37) +AdaptiveSparkPlan (39) +- == Final Plan == VeloxColumnarToRowExec (25) +- ^ ProjectExecTransformer (23) @@ -22,17 +22,19 @@ AdaptiveSparkPlan (37) +- ^ NoopFilter (11) +- ^ Scan parquet (10) +- == Initial Plan == - HashAggregate (36) - +- HashAggregate (35) - +- Project (34) - +- ShuffledHashJoin Inner BuildRight (33) - :- Exchange (29) - : +- Project (28) - : +- Filter (27) - : +- Scan parquet (26) - +- Exchange (32) - +- Filter (31) - +- Scan parquet (30) + HashAggregate (38) + +- HashAggregate (37) + +- Project (36) + +- SortMergeJoin Inner (35) + :- Sort (30) + : +- Exchange (29) + : +- Project (28) + : +- Filter (27) + : +- Scan parquet (26) + +- Sort (34) + +- Exchange (33) + +- Filter (32) + +- Scan parquet (31) (1) Scan parquet @@ -162,44 +164,52 @@ Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(30) Scan parquet +(30) Sort +Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(31) Scan parquet Output [2]: [p_partkey#X, p_type#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_partkey)] ReadSchema: struct -(31) Filter +(32) Filter Input [2]: [p_partkey#X, p_type#X] Condition : isnotnull(p_partkey#X) -(32) Exchange +(33) Exchange Input [2]: [p_partkey#X, p_type#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(33) ShuffledHashJoin +(34) Sort +Input [2]: [p_partkey#X, p_type#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(35) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(34) Project +(36) Project Output [3]: [l_extendedprice#X, l_discount#X, p_type#X] Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] -(35) HashAggregate +(37) HashAggregate Input [3]: [l_extendedprice#X, l_discount#X, p_type#X] Keys: [] Functions [2]: [partial_sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END), partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Results [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -(36) HashAggregate +(38) HashAggregate Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Keys: [] Functions [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END), sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END)#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [1]: [CheckOverflow((promote_precision(CheckOverflow((100.0000 * promote_precision(sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END)#X)), DecimalType(38,6), true)) / promote_precision(cast(sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X as decimal(38,6)))), DecimalType(38,6), true) AS promo_revenue#X] -(37) AdaptiveSparkPlan +(39) AdaptiveSparkPlan Output [1]: [promo_revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/15.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/15.txt index eab0e2908a10..db2df6c87544 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/15.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/15.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (48) +AdaptiveSparkPlan (50) +- == Final Plan == VeloxColumnarToRowExec (33) +- ^ SortExecTransformer (31) @@ -28,20 +28,22 @@ AdaptiveSparkPlan (48) +- ^ NoopFilter (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (47) - +- Exchange (46) - +- Project (45) - +- ShuffledHashJoin Inner BuildLeft (44) - :- Exchange (36) - : +- Filter (35) - : +- Scan parquet (34) - +- Filter (43) - +- HashAggregate (42) - +- Exchange (41) - +- HashAggregate (40) - +- Project (39) - +- Filter (38) - +- Scan parquet (37) + Sort (49) + +- Exchange (48) + +- Project (47) + +- SortMergeJoin Inner (46) + :- Sort (37) + : +- Exchange (36) + : +- Filter (35) + : +- Scan parquet (34) + +- Sort (45) + +- Filter (44) + +- HashAggregate (43) + +- Exchange (42) + +- HashAggregate (41) + +- Project (40) + +- Filter (39) + +- Scan parquet (38) (1) Scan parquet @@ -197,60 +199,68 @@ Condition : isnotnull(s_suppkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(37) Scan parquet +(37) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(38) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01), IsNotNull(l_suppkey)] ReadSchema: struct -(38) Filter +(39) Filter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : (((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) AND isnotnull(l_suppkey#X)) -(39) Project +(40) Project Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(40) HashAggregate +(41) HashAggregate Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(41) Exchange +(42) Exchange Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(42) HashAggregate +(43) HashAggregate Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [2]: [l_suppkey#X AS supplier_no#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X AS total_revenue#X] -(43) Filter +(44) Filter Input [2]: [supplier_no#X, total_revenue#X] Condition : (isnotnull(total_revenue#X) AND (total_revenue#X = Subquery subquery#X, [id=#X])) -(44) ShuffledHashJoin +(45) Sort +Input [2]: [supplier_no#X, total_revenue#X] +Arguments: [supplier_no#X ASC NULLS FIRST], false, 0 + +(46) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [supplier_no#X] Join condition: None -(45) Project +(47) Project Output [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Input [6]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, supplier_no#X, total_revenue#X] -(46) Exchange +(48) Exchange Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: rangepartitioning(s_suppkey#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) Sort +(49) Sort Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: [s_suppkey#X ASC NULLS FIRST], true, 0 -(48) AdaptiveSparkPlan +(50) AdaptiveSparkPlan Output [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/16.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/16.txt index 354bd4f3fabd..2eb5668906ba 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/16.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/16.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (69) +AdaptiveSparkPlan (71) +- == Final Plan == VeloxColumnarToRowExec (47) +- ^ SortExecTransformer (45) @@ -38,27 +38,29 @@ AdaptiveSparkPlan (69) +- ^ NoopFilter (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (68) - +- Exchange (67) - +- HashAggregate (66) - +- Exchange (65) - +- HashAggregate (64) - +- HashAggregate (63) - +- Exchange (62) - +- HashAggregate (61) - +- Project (60) - +- ShuffledHashJoin Inner BuildRight (59) - :- Exchange (55) - : +- BroadcastHashJoin LeftAnti BuildRight (54) - : :- Filter (49) - : : +- Scan parquet (48) - : +- BroadcastExchange (53) - : +- Project (52) - : +- Filter (51) - : +- Scan parquet (50) - +- Exchange (58) - +- Filter (57) - +- Scan parquet (56) + Sort (70) + +- Exchange (69) + +- HashAggregate (68) + +- Exchange (67) + +- HashAggregate (66) + +- HashAggregate (65) + +- Exchange (64) + +- HashAggregate (63) + +- Project (62) + +- SortMergeJoin Inner (61) + :- Sort (56) + : +- Exchange (55) + : +- BroadcastHashJoin LeftAnti BuildRight (54) + : :- Filter (49) + : : +- Scan parquet (48) + : +- BroadcastExchange (53) + : +- Project (52) + : +- Filter (51) + : +- Scan parquet (50) + +- Sort (60) + +- Exchange (59) + +- Filter (58) + +- Scan parquet (57) (1) Scan parquet @@ -296,74 +298,82 @@ Join condition: None Input [2]: [ps_partkey#X, ps_suppkey#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(56) Scan parquet +(56) Sort +Input [2]: [ps_partkey#X, ps_suppkey#X] +Arguments: [ps_partkey#X ASC NULLS FIRST], false, 0 + +(57) Scan parquet Output [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_brand), IsNotNull(p_type), Not(EqualTo(p_brand,Brand#X)), Not(StringStartsWith(p_type,MEDIUM POLISHED)), In(p_size, [14,19,23,3,36,45,49,9]), IsNotNull(p_partkey)] ReadSchema: struct -(57) Filter +(58) Filter Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Condition : (((((isnotnull(p_brand#X) AND isnotnull(p_type#X)) AND NOT (p_brand#X = Brand#X)) AND NOT StartsWith(p_type#X, MEDIUM POLISHED)) AND p_size#X IN (49,14,23,45,19,3,36,9)) AND isnotnull(p_partkey#X)) -(58) Exchange +(59) Exchange Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(59) ShuffledHashJoin +(60) Sort +Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(61) SortMergeJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(60) Project +(62) Project Output [4]: [ps_suppkey#X, p_brand#X, p_type#X, p_size#X] Input [6]: [ps_partkey#X, ps_suppkey#X, p_partkey#X, p_brand#X, p_type#X, p_size#X] -(61) HashAggregate +(63) HashAggregate Input [4]: [ps_suppkey#X, p_brand#X, p_type#X, p_size#X] Keys [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Functions: [] Aggregate Attributes: [] Results [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] -(62) Exchange +(64) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Arguments: hashpartitioning(p_brand#X, p_type#X, p_size#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(63) HashAggregate +(65) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Keys [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Functions: [] Aggregate Attributes: [] Results [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] -(64) HashAggregate +(66) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Keys [3]: [p_brand#X, p_type#X, p_size#X] Functions [1]: [partial_count(distinct ps_suppkey#X)] Aggregate Attributes [1]: [count(ps_suppkey#X)#X] Results [4]: [p_brand#X, p_type#X, p_size#X, count#X] -(65) Exchange +(67) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] Arguments: hashpartitioning(p_brand#X, p_type#X, p_size#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(66) HashAggregate +(68) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] Keys [3]: [p_brand#X, p_type#X, p_size#X] Functions [1]: [count(distinct ps_suppkey#X)] Aggregate Attributes [1]: [count(ps_suppkey#X)#X] Results [4]: [p_brand#X, p_type#X, p_size#X, count(ps_suppkey#X)#X AS supplier_cnt#X] -(67) Exchange +(69) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: rangepartitioning(supplier_cnt#X DESC NULLS LAST, p_brand#X ASC NULLS FIRST, p_type#X ASC NULLS FIRST, p_size#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(68) Sort +(70) Sort Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: [supplier_cnt#X DESC NULLS LAST, p_brand#X ASC NULLS FIRST, p_type#X ASC NULLS FIRST, p_size#X ASC NULLS FIRST], true, 0 -(69) AdaptiveSparkPlan +(71) AdaptiveSparkPlan Output [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/17.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/17.txt index 848d4e2ce4f8..5226aacff753 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/17.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/17.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (60) +AdaptiveSparkPlan (63) +- == Final Plan == VeloxColumnarToRowExec (40) +- ^ ProjectExecTransformer (38) @@ -35,25 +35,28 @@ AdaptiveSparkPlan (60) +- ^ NoopFilter (22) +- ^ Scan parquet (21) +- == Initial Plan == - HashAggregate (59) - +- HashAggregate (58) - +- Project (57) - +- ShuffledHashJoin Inner BuildRight (56) - :- Project (49) - : +- ShuffledHashJoin Inner BuildRight (48) - : :- Exchange (43) - : : +- Filter (42) - : : +- Scan parquet (41) - : +- Exchange (47) - : +- Project (46) - : +- Filter (45) - : +- Scan parquet (44) - +- Filter (55) - +- HashAggregate (54) - +- Exchange (53) - +- HashAggregate (52) - +- Filter (51) - +- Scan parquet (50) + HashAggregate (62) + +- HashAggregate (61) + +- Project (60) + +- SortMergeJoin Inner (59) + :- Project (51) + : +- SortMergeJoin Inner (50) + : :- Sort (44) + : : +- Exchange (43) + : : +- Filter (42) + : : +- Scan parquet (41) + : +- Sort (49) + : +- Exchange (48) + : +- Project (47) + : +- Filter (46) + : +- Scan parquet (45) + +- Sort (58) + +- Filter (57) + +- HashAggregate (56) + +- Exchange (55) + +- HashAggregate (54) + +- Filter (53) + +- Scan parquet (52) (1) Scan parquet @@ -247,90 +250,102 @@ Condition : (isnotnull(l_partkey#X) AND isnotnull(l_quantity#X)) Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(44) Scan parquet +(44) Sort +Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(45) Scan parquet Output [3]: [p_partkey#X, p_brand#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_brand), IsNotNull(p_container), EqualTo(p_brand,Brand#X), EqualTo(p_container,MED BOX), IsNotNull(p_partkey)] ReadSchema: struct -(45) Filter +(46) Filter Input [3]: [p_partkey#X, p_brand#X, p_container#X] Condition : ((((isnotnull(p_brand#X) AND isnotnull(p_container#X)) AND (p_brand#X = Brand#X)) AND (p_container#X = MED BOX)) AND isnotnull(p_partkey#X)) -(46) Project +(47) Project Output [1]: [p_partkey#X] Input [3]: [p_partkey#X, p_brand#X, p_container#X] -(47) Exchange +(48) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) ShuffledHashJoin +(49) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(50) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(49) Project +(51) Project Output [3]: [l_quantity#X, l_extendedprice#X, p_partkey#X] Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, p_partkey#X] -(50) Scan parquet +(52) Scan parquet Output [2]: [l_partkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey)] ReadSchema: struct -(51) Filter +(53) Filter Input [2]: [l_partkey#X, l_quantity#X] Condition : isnotnull(l_partkey#X) -(52) HashAggregate +(54) HashAggregate Input [2]: [l_partkey#X, l_quantity#X] Keys [1]: [l_partkey#X] Functions [1]: [partial_avg(l_quantity#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [3]: [l_partkey#X, sum#X, count#X] -(53) Exchange +(55) Exchange Input [3]: [l_partkey#X, sum#X, count#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(54) HashAggregate +(56) HashAggregate Input [3]: [l_partkey#X, sum#X, count#X] Keys [1]: [l_partkey#X] Functions [1]: [avg(l_quantity#X)] Aggregate Attributes [1]: [avg(l_quantity#X)#X] Results [2]: [CheckOverflow((0.200000 * promote_precision(avg(l_quantity#X)#X)), DecimalType(18,7), true) AS (0.2 * avg(l_quantity))#X, l_partkey#X] -(55) Filter +(57) Filter Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] Condition : isnotnull((0.2 * avg(l_quantity))#X) -(56) ShuffledHashJoin +(58) Sort +Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(59) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join condition: (cast(l_quantity#X as decimal(18,7)) < (0.2 * avg(l_quantity))#X) -(57) Project +(60) Project Output [1]: [l_extendedprice#X] Input [5]: [l_quantity#X, l_extendedprice#X, p_partkey#X, (0.2 * avg(l_quantity))#X, l_partkey#X] -(58) HashAggregate +(61) HashAggregate Input [1]: [l_extendedprice#X] Keys: [] Functions [1]: [partial_sum(l_extendedprice#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(59) HashAggregate +(62) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(l_extendedprice#X)] Aggregate Attributes [1]: [sum(l_extendedprice#X)#X] Results [1]: [CheckOverflow((promote_precision(sum(l_extendedprice#X)#X) / 7.00), DecimalType(27,6), true) AS avg_yearly#X] -(60) AdaptiveSparkPlan +(63) AdaptiveSparkPlan Output [1]: [avg_yearly#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/18.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/18.txt index 3d4743403809..c1287b2d685a 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/18.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/18.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (103) +AdaptiveSparkPlan (110) +- == Final Plan == VeloxColumnarToRowExec (70) +- TakeOrderedAndProjectExecTransformer (69) @@ -58,38 +58,45 @@ AdaptiveSparkPlan (103) +- ShuffleQueryStage (57) +- ReusedExchange (56) +- == Initial Plan == - TakeOrderedAndProject (102) - +- HashAggregate (101) - +- HashAggregate (100) - +- Project (99) - +- ShuffledHashJoin Inner BuildRight (98) - :- Exchange (87) - : +- Project (86) - : +- ShuffledHashJoin Inner BuildLeft (85) - : :- Exchange (73) - : : +- Filter (72) - : : +- Scan parquet (71) - : +- Exchange (84) - : +- ShuffledHashJoin LeftSemi BuildRight (83) - : :- Exchange (76) - : : +- Filter (75) - : : +- Scan parquet (74) - : +- Project (82) - : +- Filter (81) - : +- HashAggregate (80) - : +- Exchange (79) - : +- HashAggregate (78) - : +- Scan parquet (77) - +- ShuffledHashJoin LeftSemi BuildRight (97) - :- Exchange (90) - : +- Filter (89) - : +- Scan parquet (88) - +- Project (96) - +- Filter (95) - +- HashAggregate (94) - +- Exchange (93) - +- HashAggregate (92) - +- Scan parquet (91) + TakeOrderedAndProject (109) + +- HashAggregate (108) + +- HashAggregate (107) + +- Project (106) + +- SortMergeJoin Inner (105) + :- Sort (92) + : +- Exchange (91) + : +- Project (90) + : +- SortMergeJoin Inner (89) + : :- Sort (74) + : : +- Exchange (73) + : : +- Filter (72) + : : +- Scan parquet (71) + : +- Sort (88) + : +- Exchange (87) + : +- SortMergeJoin LeftSemi (86) + : :- Sort (78) + : : +- Exchange (77) + : : +- Filter (76) + : : +- Scan parquet (75) + : +- Sort (85) + : +- Project (84) + : +- Filter (83) + : +- HashAggregate (82) + : +- Exchange (81) + : +- HashAggregate (80) + : +- Scan parquet (79) + +- SortMergeJoin LeftSemi (104) + :- Sort (96) + : +- Exchange (95) + : +- Filter (94) + : +- Scan parquet (93) + +- Sort (103) + +- Project (102) + +- Filter (101) + +- HashAggregate (100) + +- Exchange (99) + +- HashAggregate (98) + +- Scan parquet (97) (1) Scan parquet @@ -401,154 +408,182 @@ Condition : isnotnull(c_custkey#X) Input [2]: [c_custkey#X, c_name#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) Scan parquet +(74) Sort +Input [2]: [c_custkey#X, c_name#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(75) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(75) Filter +(76) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Condition : (isnotnull(o_custkey#X) AND isnotnull(o_orderkey#X)) -(76) Exchange +(77) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(77) Scan parquet +(78) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(79) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(78) HashAggregate +(80) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(79) Exchange +(81) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(80) HashAggregate +(82) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(81) Filter +(83) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(82) Project +(84) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(83) ShuffledHashJoin +(85) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(86) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(84) Exchange +(87) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(85) ShuffledHashJoin +(88) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(89) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(86) Project +(90) Project Output [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] -(87) Exchange +(91) Exchange Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(88) Scan parquet +(92) Sort +Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(93) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_orderkey)] ReadSchema: struct -(89) Filter +(94) Filter Input [2]: [l_orderkey#X, l_quantity#X] Condition : isnotnull(l_orderkey#X) -(90) Exchange +(95) Exchange Input [2]: [l_orderkey#X, l_quantity#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(91) Scan parquet +(96) Sort +Input [2]: [l_orderkey#X, l_quantity#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(97) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(92) HashAggregate +(98) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(93) Exchange +(99) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(94) HashAggregate +(100) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(95) Filter +(101) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(96) Project +(102) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(97) ShuffledHashJoin +(103) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(104) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(98) ShuffledHashJoin +(105) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(99) Project +(106) Project Output [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_orderkey#X, l_quantity#X] -(100) HashAggregate +(107) HashAggregate Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] -(101) HashAggregate +(108) HashAggregate Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity#X)#X AS sum(l_quantity)#X] -(102) TakeOrderedAndProject +(109) TakeOrderedAndProject Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: X, [o_totalprice#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] -(103) AdaptiveSparkPlan +(110) AdaptiveSparkPlan Output [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/19.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/19.txt index 6ec9ae965ee9..21e4f472f3b3 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/19.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/19.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (36) +AdaptiveSparkPlan (38) +- == Final Plan == VeloxColumnarToRowExec (24) +- ^ RegularHashAggregateExecTransformer (22) @@ -21,17 +21,19 @@ AdaptiveSparkPlan (36) +- ^ NoopFilter (11) +- ^ Scan parquet (10) +- == Initial Plan == - HashAggregate (35) - +- HashAggregate (34) - +- Project (33) - +- ShuffledHashJoin Inner BuildRight (32) - :- Exchange (28) - : +- Project (27) - : +- Filter (26) - : +- Scan parquet (25) - +- Exchange (31) - +- Filter (30) - +- Scan parquet (29) + HashAggregate (37) + +- HashAggregate (36) + +- Project (35) + +- SortMergeJoin Inner (34) + :- Sort (29) + : +- Exchange (28) + : +- Project (27) + : +- Filter (26) + : +- Scan parquet (25) + +- Sort (33) + +- Exchange (32) + +- Filter (31) + +- Scan parquet (30) (1) Scan parquet @@ -157,44 +159,52 @@ Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipin Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(29) Scan parquet +(29) Sort +Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(30) Scan parquet Output [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_size), GreaterThanOrEqual(p_size,1), IsNotNull(p_partkey), Or(Or(And(And(EqualTo(p_brand,Brand#X),In(p_container, [SM BOX,SM CASE,SM PACK,SM PKG])),LessThanOrEqual(p_size,5)),And(And(EqualTo(p_brand,Brand#X),In(p_container, [MED BAG,MED BOX,MED PACK,MED PKG])),LessThanOrEqual(p_size,10))),And(And(EqualTo(p_brand,Brand#X),In(p_container, [LG BOX,LG CASE,LG PACK,LG PKG])),LessThanOrEqual(p_size,15)))] ReadSchema: struct -(30) Filter +(31) Filter Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Condition : (((isnotnull(p_size#X) AND (p_size#X >= 1)) AND isnotnull(p_partkey#X)) AND (((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (p_size#X <= 5)) OR (((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (p_size#X <= 10))) OR (((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (p_size#X <= 15)))) -(31) Exchange +(32) Exchange Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(32) ShuffledHashJoin +(33) Sort +Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(34) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: (((((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (l_quantity#X >= 1.00)) AND (l_quantity#X <= 11.00)) AND (p_size#X <= 5)) OR (((((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (l_quantity#X >= 10.00)) AND (l_quantity#X <= 20.00)) AND (p_size#X <= 10))) OR (((((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (l_quantity#X >= 20.00)) AND (l_quantity#X <= 30.00)) AND (p_size#X <= 15))) -(33) Project +(35) Project Output [2]: [l_extendedprice#X, l_discount#X] Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_brand#X, p_size#X, p_container#X] -(34) HashAggregate +(36) HashAggregate Input [2]: [l_extendedprice#X, l_discount#X] Keys: [] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(35) HashAggregate +(37) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X AS revenue#X] -(36) AdaptiveSparkPlan +(38) AdaptiveSparkPlan Output [1]: [revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/20.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/20.txt index 24be4842e1b8..1ac0992834eb 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/20.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/20.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (136) +AdaptiveSparkPlan (146) +- == Final Plan == VeloxColumnarToRowExec (96) +- ^ SortExecTransformer (94) @@ -76,45 +76,55 @@ AdaptiveSparkPlan (136) +- ^ NoopFilter (78) +- ^ Scan parquet (77) +- == Initial Plan == - Sort (135) - +- Exchange (134) - +- Project (133) - +- ShuffledHashJoin Inner BuildRight (132) - :- Exchange (127) - : +- Project (126) - : +- ShuffledHashJoin LeftSemi BuildRight (125) - : :- Exchange (99) - : : +- Filter (98) - : : +- Scan parquet (97) - : +- Exchange (124) - : +- Project (123) - : +- ShuffledHashJoin Inner BuildLeft (122) - : :- Exchange (108) - : : +- ShuffledHashJoin LeftSemi BuildRight (107) - : : :- Exchange (102) - : : : +- Filter (101) - : : : +- Scan parquet (100) - : : +- Exchange (106) - : : +- Project (105) - : : +- Filter (104) - : : +- Scan parquet (103) - : +- Exchange (121) - : +- Filter (120) - : +- HashAggregate (119) - : +- HashAggregate (118) - : +- ShuffledHashJoin LeftSemi BuildRight (117) - : :- Exchange (112) - : : +- Project (111) - : : +- Filter (110) - : : +- Scan parquet (109) - : +- Exchange (116) - : +- Project (115) - : +- Filter (114) - : +- Scan parquet (113) - +- Exchange (131) - +- Project (130) - +- Filter (129) - +- Scan parquet (128) + Sort (145) + +- Exchange (144) + +- Project (143) + +- SortMergeJoin Inner (142) + :- Sort (136) + : +- Exchange (135) + : +- Project (134) + : +- SortMergeJoin LeftSemi (133) + : :- Sort (100) + : : +- Exchange (99) + : : +- Filter (98) + : : +- Scan parquet (97) + : +- Sort (132) + : +- Exchange (131) + : +- Project (130) + : +- SortMergeJoin Inner (129) + : :- Sort (112) + : : +- Exchange (111) + : : +- SortMergeJoin LeftSemi (110) + : : :- Sort (104) + : : : +- Exchange (103) + : : : +- Filter (102) + : : : +- Scan parquet (101) + : : +- Sort (109) + : : +- Exchange (108) + : : +- Project (107) + : : +- Filter (106) + : : +- Scan parquet (105) + : +- Sort (128) + : +- Exchange (127) + : +- Filter (126) + : +- HashAggregate (125) + : +- HashAggregate (124) + : +- SortMergeJoin LeftSemi (123) + : :- Sort (117) + : : +- Exchange (116) + : : +- Project (115) + : : +- Filter (114) + : : +- Scan parquet (113) + : +- Sort (122) + : +- Exchange (121) + : +- Project (120) + : +- Filter (119) + : +- Scan parquet (118) + +- Sort (141) + +- Exchange (140) + +- Project (139) + +- Filter (138) + +- Scan parquet (137) (1) Scan parquet @@ -518,176 +528,216 @@ Condition : isnotnull(s_nationkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(100) Scan parquet +(100) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(101) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_availqty), IsNotNull(ps_partkey), IsNotNull(ps_suppkey)] ReadSchema: struct -(101) Filter +(102) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Condition : ((isnotnull(ps_availqty#X) AND isnotnull(ps_partkey#X)) AND isnotnull(ps_suppkey#X)) -(102) Exchange +(103) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(103) Scan parquet +(104) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: [ps_partkey#X ASC NULLS FIRST], false, 0 + +(105) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(104) Filter +(106) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(105) Project +(107) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(106) Exchange +(108) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(107) ShuffledHashJoin +(109) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(110) SortMergeJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(108) Exchange +(111) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(109) Scan parquet +(112) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: [ps_partkey#X ASC NULLS FIRST, ps_suppkey#X ASC NULLS FIRST], false, 0 + +(113) Scan parquet Output [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)] ReadSchema: struct -(110) Filter +(114) Filter Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND isnotnull(l_partkey#X)) AND isnotnull(l_suppkey#X)) -(111) Project +(115) Project Output [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] -(112) Exchange +(116) Exchange Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(113) Scan parquet +(117) Sort +Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(118) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(114) Filter +(119) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(115) Project +(120) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(116) Exchange +(121) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) ShuffledHashJoin +(122) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(123) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(118) HashAggregate +(124) HashAggregate Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -(119) HashAggregate +(125) HashAggregate Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [CheckOverflow((0.50 * promote_precision(sum(l_quantity#X)#X)), DecimalType(24,3), true) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(120) Filter +(126) Filter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Condition : isnotnull((0.5 * sum(l_quantity))#X) -(121) Exchange +(127) Exchange Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(122) ShuffledHashJoin +(128) Sort +Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] +Arguments: [l_partkey#X ASC NULLS FIRST, l_suppkey#X ASC NULLS FIRST], false, 0 + +(129) SortMergeJoin Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join condition: (cast(cast(ps_availqty#X as decimal(10,0)) as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(123) Project +(130) Project Output [1]: [ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(124) Exchange +(131) Exchange Input [1]: [ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(125) ShuffledHashJoin +(132) Sort +Input [1]: [ps_suppkey#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(133) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join condition: None -(126) Project +(134) Project Output [3]: [s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(127) Exchange +(135) Exchange Input [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(128) Scan parquet +(136) Sort +Input [3]: [s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(137) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(129) Filter +(138) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(130) Project +(139) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(131) Exchange +(140) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(141) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(142) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(133) Project +(143) Project Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(134) Exchange +(144) Exchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(135) Sort +(145) Sort Input [2]: [s_name#X, s_address#X] Arguments: [s_name#X ASC NULLS FIRST], true, 0 -(136) AdaptiveSparkPlan +(146) AdaptiveSparkPlan Output [2]: [s_name#X, s_address#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/21.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/21.txt index b8c363fce329..e2a72528c4ed 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/21.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/21.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (129) +AdaptiveSparkPlan (138) +- == Final Plan == VeloxColumnarToRowExec (92) +- TakeOrderedAndProjectExecTransformer (91) @@ -73,42 +73,51 @@ AdaptiveSparkPlan (129) +- ^ NoopFilter (71) +- ^ Scan parquet (70) +- == Initial Plan == - TakeOrderedAndProject (128) - +- HashAggregate (127) - +- Exchange (126) - +- HashAggregate (125) - +- Project (124) - +- ShuffledHashJoin Inner BuildRight (123) - :- Exchange (118) - : +- Project (117) - : +- ShuffledHashJoin Inner BuildRight (116) - : :- Exchange (111) - : : +- Project (110) - : : +- ShuffledHashJoin Inner BuildLeft (109) - : : :- Exchange (95) - : : : +- Filter (94) - : : : +- Scan parquet (93) - : : +- Exchange (108) - : : +- ShuffledHashJoin LeftAnti BuildRight (107) - : : :- ShuffledHashJoin LeftSemi BuildRight (102) - : : : :- Exchange (99) - : : : : +- Project (98) - : : : : +- Filter (97) - : : : : +- Scan parquet (96) - : : : +- Exchange (101) - : : : +- Scan parquet (100) - : : +- Exchange (106) - : : +- Project (105) - : : +- Filter (104) - : : +- Scan parquet (103) - : +- Exchange (115) - : +- Project (114) - : +- Filter (113) - : +- Scan parquet (112) - +- Exchange (122) - +- Project (121) - +- Filter (120) - +- Scan parquet (119) + TakeOrderedAndProject (137) + +- HashAggregate (136) + +- Exchange (135) + +- HashAggregate (134) + +- Project (133) + +- SortMergeJoin Inner (132) + :- Sort (126) + : +- Exchange (125) + : +- Project (124) + : +- SortMergeJoin Inner (123) + : :- Sort (117) + : : +- Exchange (116) + : : +- Project (115) + : : +- SortMergeJoin Inner (114) + : : :- Sort (96) + : : : +- Exchange (95) + : : : +- Filter (94) + : : : +- Scan parquet (93) + : : +- Sort (113) + : : +- Exchange (112) + : : +- SortMergeJoin LeftAnti (111) + : : :- SortMergeJoin LeftSemi (105) + : : : :- Sort (101) + : : : : +- Exchange (100) + : : : : +- Project (99) + : : : : +- Filter (98) + : : : : +- Scan parquet (97) + : : : +- Sort (104) + : : : +- Exchange (103) + : : : +- Scan parquet (102) + : : +- Sort (110) + : : +- Exchange (109) + : : +- Project (108) + : : +- Filter (107) + : : +- Scan parquet (106) + : +- Sort (122) + : +- Exchange (121) + : +- Project (120) + : +- Filter (119) + : +- Scan parquet (118) + +- Sort (131) + +- Exchange (130) + +- Project (129) + +- Filter (128) + +- Scan parquet (127) (1) Scan parquet @@ -501,163 +510,199 @@ Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(96) Scan parquet +(96) Sort +Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(97) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(97) Filter +(98) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((((isnotnull(l_receiptdate#X) AND isnotnull(l_commitdate#X)) AND (l_receiptdate#X > l_commitdate#X)) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(98) Project +(99) Project Output [2]: [l_orderkey#X, l_suppkey#X] Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] -(99) Exchange +(100) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(100) Scan parquet +(101) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(102) Scan parquet Output [2]: [l_orderkey#X, l_suppkey#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(101) Exchange +(103) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(102) ShuffledHashJoin +(104) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(105) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: NOT (l_suppkey#X = l_suppkey#X) -(103) Scan parquet +(106) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate)] ReadSchema: struct -(104) Filter +(107) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((isnotnull(l_receiptdate#X) AND isnotnull(l_commitdate#X)) AND (l_receiptdate#X > l_commitdate#X)) -(105) Project +(108) Project Output [2]: [l_orderkey#X, l_suppkey#X] Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] -(106) Exchange +(109) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(107) ShuffledHashJoin +(110) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(111) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: NOT (l_suppkey#X = l_suppkey#X) -(108) Exchange +(112) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(109) ShuffledHashJoin +(113) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(114) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [l_suppkey#X] Join condition: None -(110) Project +(115) Project Output [3]: [s_name#X, s_nationkey#X, l_orderkey#X] Input [5]: [s_suppkey#X, s_name#X, s_nationkey#X, l_orderkey#X, l_suppkey#X] -(111) Exchange +(116) Exchange Input [3]: [s_name#X, s_nationkey#X, l_orderkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(112) Scan parquet +(117) Sort +Input [3]: [s_name#X, s_nationkey#X, l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(118) Scan parquet Output [2]: [o_orderkey#X, o_orderstatus#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderstatus), EqualTo(o_orderstatus,F), IsNotNull(o_orderkey)] ReadSchema: struct -(113) Filter +(119) Filter Input [2]: [o_orderkey#X, o_orderstatus#X] Condition : ((isnotnull(o_orderstatus#X) AND (o_orderstatus#X = F)) AND isnotnull(o_orderkey#X)) -(114) Project +(120) Project Output [1]: [o_orderkey#X] Input [2]: [o_orderkey#X, o_orderstatus#X] -(115) Exchange +(121) Exchange Input [1]: [o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(116) ShuffledHashJoin +(122) Sort +Input [1]: [o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(123) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(117) Project +(124) Project Output [2]: [s_name#X, s_nationkey#X] Input [4]: [s_name#X, s_nationkey#X, l_orderkey#X, o_orderkey#X] -(118) Exchange +(125) Exchange Input [2]: [s_name#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(119) Scan parquet +(126) Sort +Input [2]: [s_name#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(127) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,SAUDI ARABIA), IsNotNull(n_nationkey)] ReadSchema: struct -(120) Filter +(128) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = SAUDI ARABIA)) AND isnotnull(n_nationkey#X)) -(121) Project +(129) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(122) Exchange +(130) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) ShuffledHashJoin +(131) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(132) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(124) Project +(133) Project Output [1]: [s_name#X] Input [3]: [s_name#X, s_nationkey#X, n_nationkey#X] -(125) HashAggregate +(134) HashAggregate Input [1]: [s_name#X] Keys [1]: [s_name#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [s_name#X, count#X] -(126) Exchange +(135) Exchange Input [2]: [s_name#X, count#X] Arguments: hashpartitioning(s_name#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(127) HashAggregate +(136) HashAggregate Input [2]: [s_name#X, count#X] Keys [1]: [s_name#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [s_name#X, count(1)#X AS numwait#X] -(128) TakeOrderedAndProject +(137) TakeOrderedAndProject Input [2]: [s_name#X, numwait#X] Arguments: X, [numwait#X DESC NULLS LAST, s_name#X ASC NULLS FIRST], [s_name#X, numwait#X] -(129) AdaptiveSparkPlan +(138) AdaptiveSparkPlan Output [2]: [s_name#X, numwait#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/22.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/22.txt index f336a73676ea..984abd470378 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/22.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/22.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (50) +AdaptiveSparkPlan (52) +- == Final Plan == VeloxColumnarToRowExec (37) +- ^ SortExecTransformer (35) @@ -30,18 +30,20 @@ AdaptiveSparkPlan (50) +- ^ ProjectExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (49) - +- Exchange (48) - +- HashAggregate (47) - +- Exchange (46) - +- HashAggregate (45) - +- Project (44) - +- ShuffledHashJoin LeftAnti BuildRight (43) - :- Exchange (40) - : +- Filter (39) - : +- Scan parquet (38) - +- Exchange (42) - +- Scan parquet (41) + Sort (51) + +- Exchange (50) + +- HashAggregate (49) + +- Exchange (48) + +- HashAggregate (47) + +- Project (46) + +- SortMergeJoin LeftAnti (45) + :- Sort (41) + : +- Exchange (40) + : +- Filter (39) + : +- Scan parquet (38) + +- Sort (44) + +- Exchange (43) + +- Scan parquet (42) (1) Scan parquet @@ -210,51 +212,59 @@ Condition : ((isnotnull(c_acctbal#X) AND substring(c_phone#X, 1, 2) IN (13,31,23 Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(41) Scan parquet +(41) Sort +Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(42) Scan parquet Output [1]: [o_custkey#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(42) Exchange +(43) Exchange Input [1]: [o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) ShuffledHashJoin +(44) Sort +Input [1]: [o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(45) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(44) Project +(46) Project Output [2]: [substring(c_phone#X, 1, 2) AS cntrycode#X, c_acctbal#X] Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] -(45) HashAggregate +(47) HashAggregate Input [2]: [cntrycode#X, c_acctbal#X] Keys [1]: [cntrycode#X] Functions [2]: [partial_count(1), partial_sum(c_acctbal#X)] Aggregate Attributes [3]: [count#X, sum#X, isEmpty#X] Results [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] -(46) Exchange +(48) Exchange Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] Arguments: hashpartitioning(cntrycode#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) HashAggregate +(49) HashAggregate Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] Keys [1]: [cntrycode#X] Functions [2]: [count(1), sum(c_acctbal#X)] Aggregate Attributes [2]: [count(1)#X, sum(c_acctbal#X)#X] Results [3]: [cntrycode#X, count(1)#X AS numcust#X, sum(c_acctbal#X)#X AS totacctbal#X] -(48) Exchange +(50) Exchange Input [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: rangepartitioning(cntrycode#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(49) Sort +(51) Sort Input [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: [cntrycode#X ASC NULLS FIRST], true, 0 -(50) AdaptiveSparkPlan +(52) AdaptiveSparkPlan Output [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/3.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/3.txt index f188fa96b0d8..58484edaa685 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/3.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/3.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (63) +AdaptiveSparkPlan (67) +- == Final Plan == VeloxColumnarToRowExec (43) +- TakeOrderedAndProjectExecTransformer (42) @@ -36,25 +36,29 @@ AdaptiveSparkPlan (63) +- ^ NoopFilter (28) +- ^ Scan parquet (27) +- == Initial Plan == - TakeOrderedAndProject (62) - +- HashAggregate (61) - +- HashAggregate (60) - +- Project (59) - +- ShuffledHashJoin Inner BuildRight (58) - :- Exchange (53) - : +- Project (52) - : +- ShuffledHashJoin Inner BuildLeft (51) - : :- Exchange (47) - : : +- Project (46) - : : +- Filter (45) - : : +- Scan parquet (44) - : +- Exchange (50) - : +- Filter (49) - : +- Scan parquet (48) - +- Exchange (57) - +- Project (56) - +- Filter (55) - +- Scan parquet (54) + TakeOrderedAndProject (66) + +- HashAggregate (65) + +- HashAggregate (64) + +- Project (63) + +- SortMergeJoin Inner (62) + :- Sort (56) + : +- Exchange (55) + : +- Project (54) + : +- SortMergeJoin Inner (53) + : :- Sort (48) + : : +- Exchange (47) + : : +- Project (46) + : : +- Filter (45) + : : +- Scan parquet (44) + : +- Sort (52) + : +- Exchange (51) + : +- Filter (50) + : +- Scan parquet (49) + +- Sort (61) + +- Exchange (60) + +- Project (59) + +- Filter (58) + +- Scan parquet (57) (1) Scan parquet @@ -256,80 +260,96 @@ Input [2]: [c_custkey#X, c_mktsegment#X] Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) Scan parquet +(48) Sort +Input [1]: [c_custkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(49) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), LessThan(o_orderdate,1995-03-15), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(49) Filter +(50) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Condition : (((isnotnull(o_orderdate#X) AND (o_orderdate#X < 1995-03-15)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(50) Exchange +(51) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(51) ShuffledHashJoin +(52) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(53) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(52) Project +(54) Project Output [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Input [5]: [c_custkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] -(53) Exchange +(55) Exchange Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(54) Scan parquet +(56) Sort +Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(57) Scan parquet Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThan(l_shipdate,1995-03-15), IsNotNull(l_orderkey)] ReadSchema: struct -(55) Filter +(58) Filter Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X > 1995-03-15)) AND isnotnull(l_orderkey#X)) -(56) Project +(59) Project Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(57) Exchange +(60) Exchange Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(58) ShuffledHashJoin +(61) Sort +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(62) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(59) Project +(63) Project Output [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] -(60) HashAggregate +(64) HashAggregate Input [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] -(61) HashAggregate +(65) HashAggregate Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [4]: [l_orderkey#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X AS revenue#X, o_orderdate#X, o_shippriority#X] -(62) TakeOrderedAndProject +(66) TakeOrderedAndProject Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: X, [revenue#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] -(63) AdaptiveSparkPlan +(67) AdaptiveSparkPlan Output [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/4.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/4.txt index 42a8fef3563f..cb7a3c3a0955 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/4.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/4.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (54) +AdaptiveSparkPlan (56) +- == Final Plan == VeloxColumnarToRowExec (38) +- ^ SortExecTransformer (36) @@ -31,21 +31,23 @@ AdaptiveSparkPlan (54) +- ^ NoopFilter (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (53) - +- Exchange (52) - +- HashAggregate (51) - +- Exchange (50) - +- HashAggregate (49) - +- Project (48) - +- ShuffledHashJoin LeftSemi BuildRight (47) - :- Exchange (42) - : +- Project (41) - : +- Filter (40) - : +- Scan parquet (39) - +- Exchange (46) - +- Project (45) - +- Filter (44) - +- Scan parquet (43) + Sort (55) + +- Exchange (54) + +- HashAggregate (53) + +- Exchange (52) + +- HashAggregate (51) + +- Project (50) + +- SortMergeJoin LeftSemi (49) + :- Sort (43) + : +- Exchange (42) + : +- Project (41) + : +- Filter (40) + : +- Scan parquet (39) + +- Sort (48) + +- Exchange (47) + +- Project (46) + +- Filter (45) + +- Scan parquet (44) (1) Scan parquet @@ -223,60 +225,68 @@ Input [3]: [o_orderkey#X, o_orderdate#X, o_orderpriority#X] Input [2]: [o_orderkey#X, o_orderpriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) Scan parquet +(43) Sort +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(44) Scan parquet Output [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate)] ReadSchema: struct -(44) Filter +(45) Filter Input [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((isnotnull(l_commitdate#X) AND isnotnull(l_receiptdate#X)) AND (l_commitdate#X < l_receiptdate#X)) -(45) Project +(46) Project Output [1]: [l_orderkey#X] Input [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] -(46) Exchange +(47) Exchange Input [1]: [l_orderkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) ShuffledHashJoin +(48) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(49) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(48) Project +(50) Project Output [1]: [o_orderpriority#X] Input [2]: [o_orderkey#X, o_orderpriority#X] -(49) HashAggregate +(51) HashAggregate Input [1]: [o_orderpriority#X] Keys [1]: [o_orderpriority#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [o_orderpriority#X, count#X] -(50) Exchange +(52) Exchange Input [2]: [o_orderpriority#X, count#X] Arguments: hashpartitioning(o_orderpriority#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(51) HashAggregate +(53) HashAggregate Input [2]: [o_orderpriority#X, count#X] Keys [1]: [o_orderpriority#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [o_orderpriority#X, count(1)#X AS order_count#X] -(52) Exchange +(54) Exchange Input [2]: [o_orderpriority#X, order_count#X] Arguments: rangepartitioning(o_orderpriority#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(53) Sort +(55) Sort Input [2]: [o_orderpriority#X, order_count#X] Arguments: [o_orderpriority#X ASC NULLS FIRST], true, 0 -(54) AdaptiveSparkPlan +(56) AdaptiveSparkPlan Output [2]: [o_orderpriority#X, order_count#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/5.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/5.txt index 378085655899..930a5a0bf488 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/5.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/5.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (146) +AdaptiveSparkPlan (156) +- == Final Plan == VeloxColumnarToRowExec (106) +- ^ SortExecTransformer (104) @@ -83,45 +83,55 @@ AdaptiveSparkPlan (146) +- ^ NoopFilter (79) +- ^ Scan parquet (78) +- == Initial Plan == - Sort (145) - +- Exchange (144) - +- HashAggregate (143) - +- Exchange (142) - +- HashAggregate (141) - +- Project (140) - +- ShuffledHashJoin Inner BuildRight (139) - :- Exchange (134) - : +- Project (133) - : +- ShuffledHashJoin Inner BuildRight (132) - : :- Exchange (128) - : : +- Project (127) - : : +- ShuffledHashJoin Inner BuildRight (126) - : : :- Exchange (122) - : : : +- Project (121) - : : : +- ShuffledHashJoin Inner BuildRight (120) - : : : :- Exchange (116) - : : : : +- Project (115) - : : : : +- ShuffledHashJoin Inner BuildLeft (114) - : : : : :- Exchange (109) - : : : : : +- Filter (108) - : : : : : +- Scan parquet (107) - : : : : +- Exchange (113) - : : : : +- Project (112) - : : : : +- Filter (111) - : : : : +- Scan parquet (110) - : : : +- Exchange (119) - : : : +- Filter (118) - : : : +- Scan parquet (117) - : : +- Exchange (125) - : : +- Filter (124) - : : +- Scan parquet (123) - : +- Exchange (131) - : +- Filter (130) - : +- Scan parquet (129) - +- Exchange (138) - +- Project (137) - +- Filter (136) - +- Scan parquet (135) + Sort (155) + +- Exchange (154) + +- HashAggregate (153) + +- Exchange (152) + +- HashAggregate (151) + +- Project (150) + +- SortMergeJoin Inner (149) + :- Sort (143) + : +- Exchange (142) + : +- Project (141) + : +- SortMergeJoin Inner (140) + : :- Sort (135) + : : +- Exchange (134) + : : +- Project (133) + : : +- SortMergeJoin Inner (132) + : : :- Sort (127) + : : : +- Exchange (126) + : : : +- Project (125) + : : : +- SortMergeJoin Inner (124) + : : : :- Sort (119) + : : : : +- Exchange (118) + : : : : +- Project (117) + : : : : +- SortMergeJoin Inner (116) + : : : : :- Sort (110) + : : : : : +- Exchange (109) + : : : : : +- Filter (108) + : : : : : +- Scan parquet (107) + : : : : +- Sort (115) + : : : : +- Exchange (114) + : : : : +- Project (113) + : : : : +- Filter (112) + : : : : +- Scan parquet (111) + : : : +- Sort (123) + : : : +- Exchange (122) + : : : +- Filter (121) + : : : +- Scan parquet (120) + : : +- Sort (131) + : : +- Exchange (130) + : : +- Filter (129) + : : +- Scan parquet (128) + : +- Sort (139) + : +- Exchange (138) + : +- Filter (137) + : +- Scan parquet (136) + +- Sort (148) + +- Exchange (147) + +- Project (146) + +- Filter (145) + +- Scan parquet (144) (1) Scan parquet @@ -567,176 +577,216 @@ Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(110) Scan parquet +(110) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(111) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1994-01-01), LessThan(o_orderdate,1995-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(111) Filter +(112) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1994-01-01)) AND (o_orderdate#X < 1995-01-01)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(112) Project +(113) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] -(113) Exchange +(114) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(115) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(116) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(115) Project +(117) Project Output [2]: [c_nationkey#X, o_orderkey#X] Input [4]: [c_custkey#X, c_nationkey#X, o_orderkey#X, o_custkey#X] -(116) Exchange +(118) Exchange Input [2]: [c_nationkey#X, o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(119) Sort +Input [2]: [c_nationkey#X, o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(120) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_orderkey), IsNotNull(l_suppkey)] ReadSchema: struct -(118) Filter +(121) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Condition : (isnotnull(l_orderkey#X) AND isnotnull(l_suppkey#X)) -(119) Exchange +(122) Exchange Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(123) Sort +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(124) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(121) Project +(125) Project Output [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Input [6]: [c_nationkey#X, o_orderkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] -(122) Exchange +(126) Exchange Input [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(127) Sort +Input [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST, c_nationkey#X ASC NULLS FIRST], false, 0 + +(128) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(124) Filter +(129) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(125) Exchange +(130) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) ShuffledHashJoin +(131) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST, s_nationkey#X ASC NULLS FIRST], false, 0 + +(132) SortMergeJoin Left keys [2]: [l_suppkey#X, c_nationkey#X] Right keys [2]: [s_suppkey#X, s_nationkey#X] Join condition: None -(127) Project +(133) Project Output [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] Input [6]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(128) Exchange +(134) Exchange Input [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Scan parquet +(135) Sort +Input [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(136) Scan parquet Output [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] ReadSchema: struct -(130) Filter +(137) Filter Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Condition : (isnotnull(n_nationkey#X) AND isnotnull(n_regionkey#X)) -(131) Exchange +(138) Exchange Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(139) Sort +Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(140) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(133) Project +(141) Project Output [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] Input [6]: [l_extendedprice#X, l_discount#X, s_nationkey#X, n_nationkey#X, n_name#X, n_regionkey#X] -(134) Exchange +(142) Exchange Input [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] Arguments: hashpartitioning(n_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(135) Scan parquet +(143) Sort +Input [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] +Arguments: [n_regionkey#X ASC NULLS FIRST], false, 0 + +(144) Scan parquet Output [2]: [r_regionkey#X, r_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(r_name), EqualTo(r_name,ASIA), IsNotNull(r_regionkey)] ReadSchema: struct -(136) Filter +(145) Filter Input [2]: [r_regionkey#X, r_name#X] Condition : ((isnotnull(r_name#X) AND (r_name#X = ASIA)) AND isnotnull(r_regionkey#X)) -(137) Project +(146) Project Output [1]: [r_regionkey#X] Input [2]: [r_regionkey#X, r_name#X] -(138) Exchange +(147) Exchange Input [1]: [r_regionkey#X] Arguments: hashpartitioning(r_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(139) ShuffledHashJoin +(148) Sort +Input [1]: [r_regionkey#X] +Arguments: [r_regionkey#X ASC NULLS FIRST], false, 0 + +(149) SortMergeJoin Left keys [1]: [n_regionkey#X] Right keys [1]: [r_regionkey#X] Join condition: None -(140) Project +(150) Project Output [3]: [l_extendedprice#X, l_discount#X, n_name#X] Input [5]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X, r_regionkey#X] -(141) HashAggregate +(151) HashAggregate Input [3]: [l_extendedprice#X, l_discount#X, n_name#X] Keys [1]: [n_name#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [n_name#X, sum#X, isEmpty#X] -(142) Exchange +(152) Exchange Input [3]: [n_name#X, sum#X, isEmpty#X] Arguments: hashpartitioning(n_name#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(143) HashAggregate +(153) HashAggregate Input [3]: [n_name#X, sum#X, isEmpty#X] Keys [1]: [n_name#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [2]: [n_name#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X AS revenue#X] -(144) Exchange +(154) Exchange Input [2]: [n_name#X, revenue#X] Arguments: rangepartitioning(revenue#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(145) Sort +(155) Sort Input [2]: [n_name#X, revenue#X] Arguments: [revenue#X DESC NULLS LAST], true, 0 -(146) AdaptiveSparkPlan +(156) AdaptiveSparkPlan Output [2]: [n_name#X, revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/7.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/7.txt index a7054770a17e..d9eb23cb737e 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/7.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/7.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (139) +AdaptiveSparkPlan (149) +- == Final Plan == VeloxColumnarToRowExec (101) +- ^ SortExecTransformer (99) @@ -79,43 +79,53 @@ AdaptiveSparkPlan (139) +- ShuffleQueryStage (79) +- ReusedExchange (78) +- == Initial Plan == - Sort (138) - +- Exchange (137) - +- HashAggregate (136) - +- Exchange (135) - +- HashAggregate (134) - +- Project (133) - +- ShuffledHashJoin Inner BuildRight (132) - :- Exchange (128) - : +- Project (127) - : +- ShuffledHashJoin Inner BuildRight (126) - : :- Exchange (122) - : : +- Project (121) - : : +- ShuffledHashJoin Inner BuildRight (120) - : : :- Exchange (116) - : : : +- Project (115) - : : : +- ShuffledHashJoin Inner BuildRight (114) - : : : :- Exchange (110) - : : : : +- Project (109) - : : : : +- ShuffledHashJoin Inner BuildLeft (108) - : : : : :- Exchange (104) - : : : : : +- Filter (103) - : : : : : +- Scan parquet (102) - : : : : +- Exchange (107) - : : : : +- Filter (106) - : : : : +- Scan parquet (105) - : : : +- Exchange (113) - : : : +- Filter (112) - : : : +- Scan parquet (111) - : : +- Exchange (119) - : : +- Filter (118) - : : +- Scan parquet (117) - : +- Exchange (125) - : +- Filter (124) - : +- Scan parquet (123) - +- Exchange (131) - +- Filter (130) - +- Scan parquet (129) + Sort (148) + +- Exchange (147) + +- HashAggregate (146) + +- Exchange (145) + +- HashAggregate (144) + +- Project (143) + +- SortMergeJoin Inner (142) + :- Sort (137) + : +- Exchange (136) + : +- Project (135) + : +- SortMergeJoin Inner (134) + : :- Sort (129) + : : +- Exchange (128) + : : +- Project (127) + : : +- SortMergeJoin Inner (126) + : : :- Sort (121) + : : : +- Exchange (120) + : : : +- Project (119) + : : : +- SortMergeJoin Inner (118) + : : : :- Sort (113) + : : : : +- Exchange (112) + : : : : +- Project (111) + : : : : +- SortMergeJoin Inner (110) + : : : : :- Sort (105) + : : : : : +- Exchange (104) + : : : : : +- Filter (103) + : : : : : +- Scan parquet (102) + : : : : +- Sort (109) + : : : : +- Exchange (108) + : : : : +- Filter (107) + : : : : +- Scan parquet (106) + : : : +- Sort (117) + : : : +- Exchange (116) + : : : +- Filter (115) + : : : +- Scan parquet (114) + : : +- Sort (125) + : : +- Exchange (124) + : : +- Filter (123) + : : +- Scan parquet (122) + : +- Sort (133) + : +- Exchange (132) + : +- Filter (131) + : +- Scan parquet (130) + +- Sort (141) + +- Exchange (140) + +- Filter (139) + +- Scan parquet (138) (1) Scan parquet @@ -537,168 +547,208 @@ Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(105) Scan parquet +(105) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(106) Scan parquet Output [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1995-01-01), LessThanOrEqual(l_shipdate,1996-12-31), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(106) Filter +(107) Filter Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1995-01-01)) AND (l_shipdate#X <= 1996-12-31)) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(107) Exchange +(108) Exchange Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(108) ShuffledHashJoin +(109) Sort +Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(110) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [l_suppkey#X] Join condition: None -(109) Project +(111) Project Output [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Input [7]: [s_suppkey#X, s_nationkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(110) Exchange +(112) Exchange Input [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(111) Scan parquet +(113) Sort +Input [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(114) Scan parquet Output [2]: [o_orderkey#X, o_custkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderkey), IsNotNull(o_custkey)] ReadSchema: struct -(112) Filter +(115) Filter Input [2]: [o_orderkey#X, o_custkey#X] Condition : (isnotnull(o_orderkey#X) AND isnotnull(o_custkey#X)) -(113) Exchange +(116) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(117) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(118) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(115) Project +(119) Project Output [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] Input [7]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_orderkey#X, o_custkey#X] -(116) Exchange +(120) Exchange Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(121) Sort +Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(122) Scan parquet Output [2]: [c_custkey#X, c_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] ReadSchema: struct -(118) Filter +(123) Filter Input [2]: [c_custkey#X, c_nationkey#X] Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) -(119) Exchange +(124) Exchange Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(125) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(126) SortMergeJoin Left keys [1]: [o_custkey#X] Right keys [1]: [c_custkey#X] Join condition: None -(121) Project +(127) Project Output [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X, c_custkey#X, c_nationkey#X] -(122) Exchange +(128) Exchange Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(129) Sort +Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(130) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), Or(EqualTo(n_name,FRANCE),EqualTo(n_name,GERMANY))] ReadSchema: struct -(124) Filter +(131) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : (isnotnull(n_nationkey#X) AND ((n_name#X = FRANCE) OR (n_name#X = GERMANY))) -(125) Exchange +(132) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) ShuffledHashJoin +(133) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(134) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(127) Project +(135) Project Output [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_nationkey#X, n_name#X] -(128) Exchange +(136) Exchange Input [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Scan parquet +(137) Sort +Input [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(138) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), Or(EqualTo(n_name,GERMANY),EqualTo(n_name,FRANCE))] ReadSchema: struct -(130) Filter +(139) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : (isnotnull(n_nationkey#X) AND ((n_name#X = GERMANY) OR (n_name#X = FRANCE))) -(131) Exchange +(140) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(141) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(142) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: (((n_name#X = FRANCE) AND (n_name#X = GERMANY)) OR ((n_name#X = GERMANY) AND (n_name#X = FRANCE))) -(133) Project +(143) Project Output [4]: [n_name#X AS supp_nation#X, n_name#X AS cust_nation#X, year(l_shipdate#X) AS l_year#X, CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) AS volume#X] Input [7]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X, n_nationkey#X, n_name#X] -(134) HashAggregate +(144) HashAggregate Input [4]: [supp_nation#X, cust_nation#X, l_year#X, volume#X] Keys [3]: [supp_nation#X, cust_nation#X, l_year#X] Functions [1]: [partial_sum(volume#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] -(135) Exchange +(145) Exchange Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] Arguments: hashpartitioning(supp_nation#X, cust_nation#X, l_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(136) HashAggregate +(146) HashAggregate Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] Keys [3]: [supp_nation#X, cust_nation#X, l_year#X] Functions [1]: [sum(volume#X)] Aggregate Attributes [1]: [sum(volume#X)#X] Results [4]: [supp_nation#X, cust_nation#X, l_year#X, sum(volume#X)#X AS revenue#X] -(137) Exchange +(147) Exchange Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: rangepartitioning(supp_nation#X ASC NULLS FIRST, cust_nation#X ASC NULLS FIRST, l_year#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(138) Sort +(148) Sort Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: [supp_nation#X ASC NULLS FIRST, cust_nation#X ASC NULLS FIRST, l_year#X ASC NULLS FIRST], true, 0 -(139) AdaptiveSparkPlan +(149) AdaptiveSparkPlan Output [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/8.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/8.txt index cdede8445908..5c9e51b95c60 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/8.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/8.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (193) +AdaptiveSparkPlan (207) +- == Final Plan == VeloxColumnarToRowExec (141) +- ^ SortExecTransformer (139) @@ -110,57 +110,71 @@ AdaptiveSparkPlan (193) +- ^ NoopFilter (113) +- ^ Scan parquet (112) +- == Initial Plan == - Sort (192) - +- Exchange (191) - +- HashAggregate (190) - +- Exchange (189) - +- HashAggregate (188) - +- Project (187) - +- ShuffledHashJoin Inner BuildRight (186) - :- Exchange (181) - : +- Project (180) - : +- ShuffledHashJoin Inner BuildRight (179) - : :- Exchange (175) - : : +- Project (174) - : : +- ShuffledHashJoin Inner BuildRight (173) - : : :- Exchange (169) - : : : +- Project (168) - : : : +- ShuffledHashJoin Inner BuildRight (167) - : : : :- Exchange (163) - : : : : +- Project (162) - : : : : +- ShuffledHashJoin Inner BuildRight (161) - : : : : :- Exchange (157) - : : : : : +- Project (156) - : : : : : +- ShuffledHashJoin Inner BuildRight (155) - : : : : : :- Exchange (151) - : : : : : : +- Project (150) - : : : : : : +- ShuffledHashJoin Inner BuildLeft (149) - : : : : : : :- Exchange (145) - : : : : : : : +- Project (144) - : : : : : : : +- Filter (143) - : : : : : : : +- Scan parquet (142) - : : : : : : +- Exchange (148) - : : : : : : +- Filter (147) - : : : : : : +- Scan parquet (146) - : : : : : +- Exchange (154) - : : : : : +- Filter (153) - : : : : : +- Scan parquet (152) - : : : : +- Exchange (160) - : : : : +- Filter (159) - : : : : +- Scan parquet (158) - : : : +- Exchange (166) - : : : +- Filter (165) - : : : +- Scan parquet (164) - : : +- Exchange (172) - : : +- Filter (171) - : : +- Scan parquet (170) - : +- Exchange (178) - : +- Filter (177) - : +- Scan parquet (176) - +- Exchange (185) - +- Project (184) - +- Filter (183) - +- Scan parquet (182) + Sort (206) + +- Exchange (205) + +- HashAggregate (204) + +- Exchange (203) + +- HashAggregate (202) + +- Project (201) + +- SortMergeJoin Inner (200) + :- Sort (194) + : +- Exchange (193) + : +- Project (192) + : +- SortMergeJoin Inner (191) + : :- Sort (186) + : : +- Exchange (185) + : : +- Project (184) + : : +- SortMergeJoin Inner (183) + : : :- Sort (178) + : : : +- Exchange (177) + : : : +- Project (176) + : : : +- SortMergeJoin Inner (175) + : : : :- Sort (170) + : : : : +- Exchange (169) + : : : : +- Project (168) + : : : : +- SortMergeJoin Inner (167) + : : : : :- Sort (162) + : : : : : +- Exchange (161) + : : : : : +- Project (160) + : : : : : +- SortMergeJoin Inner (159) + : : : : : :- Sort (154) + : : : : : : +- Exchange (153) + : : : : : : +- Project (152) + : : : : : : +- SortMergeJoin Inner (151) + : : : : : : :- Sort (146) + : : : : : : : +- Exchange (145) + : : : : : : : +- Project (144) + : : : : : : : +- Filter (143) + : : : : : : : +- Scan parquet (142) + : : : : : : +- Sort (150) + : : : : : : +- Exchange (149) + : : : : : : +- Filter (148) + : : : : : : +- Scan parquet (147) + : : : : : +- Sort (158) + : : : : : +- Exchange (157) + : : : : : +- Filter (156) + : : : : : +- Scan parquet (155) + : : : : +- Sort (166) + : : : : +- Exchange (165) + : : : : +- Filter (164) + : : : : +- Scan parquet (163) + : : : +- Sort (174) + : : : +- Exchange (173) + : : : +- Filter (172) + : : : +- Scan parquet (171) + : : +- Sort (182) + : : +- Exchange (181) + : : +- Filter (180) + : : +- Scan parquet (179) + : +- Sort (190) + : +- Exchange (189) + : +- Filter (188) + : +- Scan parquet (187) + +- Sort (199) + +- Exchange (198) + +- Project (197) + +- Filter (196) + +- Scan parquet (195) (1) Scan parquet @@ -750,228 +764,284 @@ Input [2]: [p_partkey#X, p_type#X] Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(146) Scan parquet +(146) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(147) Scan parquet Output [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(147) Filter +(148) Filter Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Condition : ((isnotnull(l_partkey#X) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(148) Exchange +(149) Exchange Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(149) ShuffledHashJoin +(150) Sort +Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(151) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join condition: None -(150) Project +(152) Project Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Input [6]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] -(151) Exchange +(153) Exchange Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(152) Scan parquet +(154) Sort +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(155) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(153) Filter +(156) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(154) Exchange +(157) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(155) ShuffledHashJoin +(158) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(159) SortMergeJoin Left keys [1]: [l_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(156) Project +(160) Project Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Input [6]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(157) Exchange +(161) Exchange Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(158) Scan parquet +(162) Sort +Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(163) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1995-01-01), LessThanOrEqual(o_orderdate,1996-12-31), IsNotNull(o_orderkey), IsNotNull(o_custkey)] ReadSchema: struct -(159) Filter +(164) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1995-01-01)) AND (o_orderdate#X <= 1996-12-31)) AND isnotnull(o_orderkey#X)) AND isnotnull(o_custkey#X)) -(160) Exchange +(165) Exchange Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(161) ShuffledHashJoin +(166) Sort +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(167) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(162) Project +(168) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] Input [7]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X] -(163) Exchange +(169) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(164) Scan parquet +(170) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(171) Scan parquet Output [2]: [c_custkey#X, c_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] ReadSchema: struct -(165) Filter +(172) Filter Input [2]: [c_custkey#X, c_nationkey#X] Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) -(166) Exchange +(173) Exchange Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(167) ShuffledHashJoin +(174) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(175) SortMergeJoin Left keys [1]: [o_custkey#X] Right keys [1]: [c_custkey#X] Join condition: None -(168) Project +(176) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X, c_custkey#X, c_nationkey#X] -(169) Exchange +(177) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(170) Scan parquet +(178) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(179) Scan parquet Output [2]: [n_nationkey#X, n_regionkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] ReadSchema: struct -(171) Filter +(180) Filter Input [2]: [n_nationkey#X, n_regionkey#X] Condition : (isnotnull(n_nationkey#X) AND isnotnull(n_regionkey#X)) -(172) Exchange +(181) Exchange Input [2]: [n_nationkey#X, n_regionkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(173) ShuffledHashJoin +(182) Sort +Input [2]: [n_nationkey#X, n_regionkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(183) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(174) Project +(184) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X, n_nationkey#X, n_regionkey#X] -(175) Exchange +(185) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(176) Scan parquet +(186) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(187) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(177) Filter +(188) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(178) Exchange +(189) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(179) ShuffledHashJoin +(190) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(191) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(180) Project +(192) Project Output [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X, n_nationkey#X, n_name#X] -(181) Exchange +(193) Exchange Input [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] Arguments: hashpartitioning(n_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(182) Scan parquet +(194) Sort +Input [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] +Arguments: [n_regionkey#X ASC NULLS FIRST], false, 0 + +(195) Scan parquet Output [2]: [r_regionkey#X, r_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(r_name), EqualTo(r_name,AMERICA), IsNotNull(r_regionkey)] ReadSchema: struct -(183) Filter +(196) Filter Input [2]: [r_regionkey#X, r_name#X] Condition : ((isnotnull(r_name#X) AND (r_name#X = AMERICA)) AND isnotnull(r_regionkey#X)) -(184) Project +(197) Project Output [1]: [r_regionkey#X] Input [2]: [r_regionkey#X, r_name#X] -(185) Exchange +(198) Exchange Input [1]: [r_regionkey#X] Arguments: hashpartitioning(r_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(186) ShuffledHashJoin +(199) Sort +Input [1]: [r_regionkey#X] +Arguments: [r_regionkey#X ASC NULLS FIRST], false, 0 + +(200) SortMergeJoin Left keys [1]: [n_regionkey#X] Right keys [1]: [r_regionkey#X] Join condition: None -(187) Project +(201) Project Output [3]: [year(o_orderdate#X) AS o_year#X, CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) AS volume#X, n_name#X AS nation#X] Input [6]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X, r_regionkey#X] -(188) HashAggregate +(202) HashAggregate Input [3]: [o_year#X, volume#X, nation#X] Keys [1]: [o_year#X] Functions [2]: [partial_sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END), partial_sum(volume#X)] Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Results [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] -(189) Exchange +(203) Exchange Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] Arguments: hashpartitioning(o_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(190) HashAggregate +(204) HashAggregate Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] Keys [1]: [o_year#X] Functions [2]: [sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END), sum(volume#X)] Aggregate Attributes [2]: [sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X, sum(volume#X)#X] Results [2]: [o_year#X, CheckOverflow((promote_precision(sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X) / promote_precision(sum(volume#X)#X)), DecimalType(38,6), true) AS mkt_share#X] -(191) Exchange +(205) Exchange Input [2]: [o_year#X, mkt_share#X] Arguments: rangepartitioning(o_year#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(192) Sort +(206) Sort Input [2]: [o_year#X, mkt_share#X] Arguments: [o_year#X ASC NULLS FIRST], true, 0 -(193) AdaptiveSparkPlan +(207) AdaptiveSparkPlan Output [2]: [o_year#X, mkt_share#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/9.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/9.txt index 11a02d0a54d2..2abb6ec215c6 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/9.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark32/9.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (145) +AdaptiveSparkPlan (155) +- == Final Plan == VeloxColumnarToRowExec (106) +- ^ SortExecTransformer (104) @@ -83,44 +83,54 @@ AdaptiveSparkPlan (145) +- ^ NoopFilter (79) +- ^ Scan parquet (78) +- == Initial Plan == - Sort (144) - +- Exchange (143) - +- HashAggregate (142) - +- Exchange (141) - +- HashAggregate (140) - +- Project (139) - +- ShuffledHashJoin Inner BuildRight (138) - :- Exchange (134) - : +- Project (133) - : +- ShuffledHashJoin Inner BuildRight (132) - : :- Exchange (128) - : : +- Project (127) - : : +- ShuffledHashJoin Inner BuildRight (126) - : : :- Exchange (122) - : : : +- Project (121) - : : : +- ShuffledHashJoin Inner BuildRight (120) - : : : :- Exchange (116) - : : : : +- Project (115) - : : : : +- ShuffledHashJoin Inner BuildLeft (114) - : : : : :- Exchange (110) - : : : : : +- Project (109) - : : : : : +- Filter (108) - : : : : : +- Scan parquet (107) - : : : : +- Exchange (113) - : : : : +- Filter (112) - : : : : +- Scan parquet (111) - : : : +- Exchange (119) - : : : +- Filter (118) - : : : +- Scan parquet (117) - : : +- Exchange (125) - : : +- Filter (124) - : : +- Scan parquet (123) - : +- Exchange (131) - : +- Filter (130) - : +- Scan parquet (129) - +- Exchange (137) - +- Filter (136) - +- Scan parquet (135) + Sort (154) + +- Exchange (153) + +- HashAggregate (152) + +- Exchange (151) + +- HashAggregate (150) + +- Project (149) + +- SortMergeJoin Inner (148) + :- Sort (143) + : +- Exchange (142) + : +- Project (141) + : +- SortMergeJoin Inner (140) + : :- Sort (135) + : : +- Exchange (134) + : : +- Project (133) + : : +- SortMergeJoin Inner (132) + : : :- Sort (127) + : : : +- Exchange (126) + : : : +- Project (125) + : : : +- SortMergeJoin Inner (124) + : : : :- Sort (119) + : : : : +- Exchange (118) + : : : : +- Project (117) + : : : : +- SortMergeJoin Inner (116) + : : : : :- Sort (111) + : : : : : +- Exchange (110) + : : : : : +- Project (109) + : : : : : +- Filter (108) + : : : : : +- Scan parquet (107) + : : : : +- Sort (115) + : : : : +- Exchange (114) + : : : : +- Filter (113) + : : : : +- Scan parquet (112) + : : : +- Sort (123) + : : : +- Exchange (122) + : : : +- Filter (121) + : : : +- Scan parquet (120) + : : +- Sort (131) + : : +- Exchange (130) + : : +- Filter (129) + : : +- Scan parquet (128) + : +- Sort (139) + : +- Exchange (138) + : +- Filter (137) + : +- Scan parquet (136) + +- Sort (147) + +- Exchange (146) + +- Filter (145) + +- Scan parquet (144) (1) Scan parquet @@ -570,168 +580,208 @@ Input [2]: [p_partkey#X, p_name#X] Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(111) Scan parquet +(111) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(112) Scan parquet Output [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(112) Filter +(113) Filter Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Condition : ((isnotnull(l_partkey#X) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(113) Exchange +(114) Exchange Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(115) Sort +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(116) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join condition: None -(115) Project +(117) Project Output [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Input [7]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] -(116) Exchange +(118) Exchange Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(119) Sort +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(120) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(118) Filter +(121) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(119) Exchange +(122) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(123) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(124) SortMergeJoin Left keys [1]: [l_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(121) Project +(125) Project Output [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Input [8]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(122) Exchange +(126) Exchange Input [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(l_suppkey#X, l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(127) Sort +Input [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [l_suppkey#X ASC NULLS FIRST, l_partkey#X ASC NULLS FIRST], false, 0 + +(128) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey), IsNotNull(ps_partkey)] ReadSchema: struct -(124) Filter +(129) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Condition : (isnotnull(ps_suppkey#X) AND isnotnull(ps_partkey#X)) -(125) Exchange +(130) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) ShuffledHashJoin +(131) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST, ps_partkey#X ASC NULLS FIRST], false, 0 + +(132) SortMergeJoin Left keys [2]: [l_suppkey#X, l_partkey#X] Right keys [2]: [ps_suppkey#X, ps_partkey#X] Join condition: None -(127) Project +(133) Project Output [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] Input [10]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_partkey#X, ps_suppkey#X, ps_supplycost#X] -(128) Exchange +(134) Exchange Input [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Scan parquet +(135) Sort +Input [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(136) Scan parquet Output [2]: [o_orderkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderkey)] ReadSchema: struct -(130) Filter +(137) Filter Input [2]: [o_orderkey#X, o_orderdate#X] Condition : isnotnull(o_orderkey#X) -(131) Exchange +(138) Exchange Input [2]: [o_orderkey#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(139) Sort +Input [2]: [o_orderkey#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(140) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(133) Project +(141) Project Output [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] Input [8]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderkey#X, o_orderdate#X] -(134) Exchange +(142) Exchange Input [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(135) Scan parquet +(143) Sort +Input [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(144) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(136) Filter +(145) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(137) Exchange +(146) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(138) ShuffledHashJoin +(147) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(148) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(139) Project +(149) Project Output [3]: [n_name#X AS nation#X, year(o_orderdate#X) AS o_year#X, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) as decimal(27,4))) - promote_precision(cast(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(l_quantity#X)), DecimalType(25,4), true) as decimal(27,4)))), DecimalType(27,4), true) AS amount#X] Input [8]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X, n_nationkey#X, n_name#X] -(140) HashAggregate +(150) HashAggregate Input [3]: [nation#X, o_year#X, amount#X] Keys [2]: [nation#X, o_year#X] Functions [1]: [partial_sum(amount#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [nation#X, o_year#X, sum#X, isEmpty#X] -(141) Exchange +(151) Exchange Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] Arguments: hashpartitioning(nation#X, o_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(142) HashAggregate +(152) HashAggregate Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] Keys [2]: [nation#X, o_year#X] Functions [1]: [sum(amount#X)] Aggregate Attributes [1]: [sum(amount#X)#X] Results [3]: [nation#X, o_year#X, sum(amount#X)#X AS sum_profit#X] -(143) Exchange +(153) Exchange Input [3]: [nation#X, o_year#X, sum_profit#X] Arguments: rangepartitioning(nation#X ASC NULLS FIRST, o_year#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(144) Sort +(154) Sort Input [3]: [nation#X, o_year#X, sum_profit#X] Arguments: [nation#X ASC NULLS FIRST, o_year#X DESC NULLS LAST], true, 0 -(145) AdaptiveSparkPlan +(155) AdaptiveSparkPlan Output [3]: [nation#X, o_year#X, sum_profit#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/10.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/10.txt index 5a74265ab590..3be5f1996fa8 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/10.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/10.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (94) +AdaptiveSparkPlan (100) +- == Final Plan == VeloxColumnarToRowExec (67) +- TakeOrderedAndProjectExecTransformer (66) @@ -54,32 +54,38 @@ AdaptiveSparkPlan (94) +- ^ NoopFilter (45) +- ^ Scan parquet (44) +- == Initial Plan == - TakeOrderedAndProject (93) - +- HashAggregate (92) - +- Exchange (91) - +- HashAggregate (90) - +- Project (89) - +- ShuffledHashJoin Inner BuildRight (88) - :- Exchange (84) - : +- Project (83) - : +- ShuffledHashJoin Inner BuildRight (82) - : :- Exchange (77) - : : +- Project (76) - : : +- ShuffledHashJoin Inner BuildRight (75) - : : :- Exchange (70) - : : : +- Filter (69) - : : : +- Scan parquet (68) - : : +- Exchange (74) - : : +- Project (73) - : : +- Filter (72) - : : +- Scan parquet (71) - : +- Exchange (81) - : +- Project (80) - : +- Filter (79) - : +- Scan parquet (78) - +- Exchange (87) - +- Filter (86) - +- Scan parquet (85) + TakeOrderedAndProject (99) + +- HashAggregate (98) + +- Exchange (97) + +- HashAggregate (96) + +- Project (95) + +- SortMergeJoin Inner (94) + :- Sort (89) + : +- Exchange (88) + : +- Project (87) + : +- SortMergeJoin Inner (86) + : :- Sort (80) + : : +- Exchange (79) + : : +- Project (78) + : : +- SortMergeJoin Inner (77) + : : :- Sort (71) + : : : +- Exchange (70) + : : : +- Filter (69) + : : : +- Scan parquet (68) + : : +- Sort (76) + : : +- Exchange (75) + : : +- Project (74) + : : +- Filter (73) + : : +- Scan parquet (72) + : +- Sort (85) + : +- Exchange (84) + : +- Project (83) + : +- Filter (82) + : +- Scan parquet (81) + +- Sort (93) + +- Exchange (92) + +- Filter (91) + +- Scan parquet (90) (1) Scan parquet @@ -371,116 +377,140 @@ Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) Input [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(71) Scan parquet +(71) Sort +Input [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(72) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1993-10-01), LessThan(o_orderdate,1994-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(72) Filter +(73) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1993-10-01)) AND (o_orderdate#X < 1994-01-01)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(73) Project +(74) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] -(74) Exchange +(75) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(75) ShuffledHashJoin +(76) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(77) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(76) Project +(78) Project Output [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, o_custkey#X] -(77) Exchange +(79) Exchange Input [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(78) Scan parquet +(80) Sort +Input [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(81) Scan parquet Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_returnflag), EqualTo(l_returnflag,R), IsNotNull(l_orderkey)] ReadSchema: struct -(79) Filter +(82) Filter Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] Condition : ((isnotnull(l_returnflag#X) AND (l_returnflag#X = R)) AND isnotnull(l_orderkey#X)) -(80) Project +(83) Project Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] -(81) Exchange +(84) Exchange Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(82) ShuffledHashJoin +(85) Sort +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(86) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(83) Project +(87) Project Output [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X] -(84) Exchange +(88) Exchange Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(85) Scan parquet +(89) Sort +Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(90) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(86) Filter +(91) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(87) Exchange +(92) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(88) ShuffledHashJoin +(93) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(94) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(89) Project +(95) Project Output [9]: [c_custkey#X, c_name#X, c_address#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_name#X] Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_nationkey#X, n_name#X] -(90) HashAggregate +(96) HashAggregate Input [9]: [c_custkey#X, c_name#X, c_address#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_name#X] Keys [7]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] -(91) Exchange +(97) Exchange Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] Arguments: hashpartitioning(c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(92) HashAggregate +(98) HashAggregate Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] Keys [7]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [8]: [c_custkey#X, c_name#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] -(93) TakeOrderedAndProject +(99) TakeOrderedAndProject Input [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] Arguments: X, [revenue#X DESC NULLS LAST], [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] -(94) AdaptiveSparkPlan +(100) AdaptiveSparkPlan Output [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/11.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/11.txt index 8d17beb8c0a9..2347eb9b677e 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/11.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/11.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (78) +AdaptiveSparkPlan (82) +- == Final Plan == VeloxColumnarToRowExec (56) +- ^ SortExecTransformer (54) @@ -45,27 +45,31 @@ AdaptiveSparkPlan (78) +- ^ NoopFilter (28) +- ^ Scan parquet (27) +- == Initial Plan == - Sort (77) - +- Exchange (76) - +- Filter (75) - +- HashAggregate (74) - +- Exchange (73) - +- HashAggregate (72) - +- Project (71) - +- ShuffledHashJoin Inner BuildRight (70) - :- Exchange (65) - : +- Project (64) - : +- ShuffledHashJoin Inner BuildRight (63) - : :- Exchange (59) - : : +- Filter (58) - : : +- Scan parquet (57) - : +- Exchange (62) - : +- Filter (61) - : +- Scan parquet (60) - +- Exchange (69) - +- Project (68) - +- Filter (67) - +- Scan parquet (66) + Sort (81) + +- Exchange (80) + +- Filter (79) + +- HashAggregate (78) + +- Exchange (77) + +- HashAggregate (76) + +- Project (75) + +- SortMergeJoin Inner (74) + :- Sort (68) + : +- Exchange (67) + : +- Project (66) + : +- SortMergeJoin Inner (65) + : :- Sort (60) + : : +- Exchange (59) + : : +- Filter (58) + : : +- Scan parquet (57) + : +- Sort (64) + : +- Exchange (63) + : +- Filter (62) + : +- Scan parquet (61) + +- Sort (73) + +- Exchange (72) + +- Project (71) + +- Filter (70) + +- Scan parquet (69) (1) Scan parquet @@ -311,359 +315,395 @@ Condition : isnotnull(ps_suppkey#X) Input [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(60) Scan parquet +(60) Sort +Input [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(61) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(61) Filter +(62) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(62) Exchange +(63) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(63) ShuffledHashJoin +(64) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(65) SortMergeJoin Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(64) Project +(66) Project Output [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(65) Exchange +(67) Exchange Input [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(66) Scan parquet +(68) Sort +Input [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(69) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,GERMANY), IsNotNull(n_nationkey)] ReadSchema: struct -(67) Filter +(70) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = GERMANY)) AND isnotnull(n_nationkey#X)) -(68) Project +(71) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(69) Exchange +(72) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(70) ShuffledHashJoin +(73) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(74) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(71) Project +(75) Project Output [3]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X] Input [5]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(72) HashAggregate +(76) HashAggregate Input [3]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X] Keys [1]: [ps_partkey#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [ps_partkey#X, sum#X, isEmpty#X] -(73) Exchange +(77) Exchange Input [3]: [ps_partkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) HashAggregate +(78) HashAggregate Input [3]: [ps_partkey#X, sum#X, isEmpty#X] Keys [1]: [ps_partkey#X] Functions [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X] Results [2]: [ps_partkey#X, sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X AS value#X] -(75) Filter +(79) Filter Input [2]: [ps_partkey#X, value#X] Condition : (isnotnull(value#X) AND (cast(value#X as decimal(38,6)) > Subquery subquery#X, [id=#X])) -(76) Exchange +(80) Exchange Input [2]: [ps_partkey#X, value#X] Arguments: rangepartitioning(value#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(77) Sort +(81) Sort Input [2]: [ps_partkey#X, value#X] Arguments: [value#X DESC NULLS LAST], true, 0 -(78) AdaptiveSparkPlan +(82) AdaptiveSparkPlan Output [2]: [ps_partkey#X, value#X] Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 47 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (128) +AdaptiveSparkPlan (136) +- == Final Plan == - VeloxColumnarToRowExec (110) - +- ^ ProjectExecTransformer (108) - +- ^ RegularHashAggregateExecTransformer (107) - +- ^ RegularHashAggregateExecTransformer (106) - +- ^ ProjectExecTransformer (105) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (104) - :- ^ InputIteratorTransformer (99) - : +- ShuffleQueryStage (97), Statistics(X) - : +- ColumnarExchange (96) - : +- VeloxAppendBatches (95) - : +- ^ ProjectExecTransformer (93) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (92) - : :- ^ InputIteratorTransformer (87) - : : +- ShuffleQueryStage (85), Statistics(X) - : : +- ColumnarExchange (84) - : : +- VeloxAppendBatches (83) - : : +- ^ ProjectExecTransformer (81) - : : +- ^ NoopFilter (80) - : : +- ^ Scan parquet (79) - : +- ^ InputIteratorTransformer (91) - : +- ShuffleQueryStage (89), Statistics(X) - : +- ReusedExchange (88) - +- ^ InputIteratorTransformer (103) - +- ShuffleQueryStage (101), Statistics(X) - +- ReusedExchange (100) + VeloxColumnarToRowExec (114) + +- ^ ProjectExecTransformer (112) + +- ^ RegularHashAggregateExecTransformer (111) + +- ^ RegularHashAggregateExecTransformer (110) + +- ^ ProjectExecTransformer (109) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (108) + :- ^ InputIteratorTransformer (103) + : +- ShuffleQueryStage (101), Statistics(X) + : +- ColumnarExchange (100) + : +- VeloxAppendBatches (99) + : +- ^ ProjectExecTransformer (97) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (96) + : :- ^ InputIteratorTransformer (91) + : : +- ShuffleQueryStage (89), Statistics(X) + : : +- ColumnarExchange (88) + : : +- VeloxAppendBatches (87) + : : +- ^ ProjectExecTransformer (85) + : : +- ^ NoopFilter (84) + : : +- ^ Scan parquet (83) + : +- ^ InputIteratorTransformer (95) + : +- ShuffleQueryStage (93), Statistics(X) + : +- ReusedExchange (92) + +- ^ InputIteratorTransformer (107) + +- ShuffleQueryStage (105), Statistics(X) + +- ReusedExchange (104) +- == Initial Plan == - HashAggregate (127) - +- HashAggregate (126) - +- Project (125) - +- ShuffledHashJoin Inner BuildRight (124) - :- Exchange (119) - : +- Project (118) - : +- ShuffledHashJoin Inner BuildRight (117) - : :- Exchange (113) - : : +- Filter (112) - : : +- Scan parquet (111) - : +- Exchange (116) - : +- Filter (115) - : +- Scan parquet (114) - +- Exchange (123) - +- Project (122) - +- Filter (121) - +- Scan parquet (120) - - -(79) Scan parquet + HashAggregate (135) + +- HashAggregate (134) + +- Project (133) + +- SortMergeJoin Inner (132) + :- Sort (126) + : +- Exchange (125) + : +- Project (124) + : +- SortMergeJoin Inner (123) + : :- Sort (118) + : : +- Exchange (117) + : : +- Filter (116) + : : +- Scan parquet (115) + : +- Sort (122) + : +- Exchange (121) + : +- Filter (120) + : +- Scan parquet (119) + +- Sort (131) + +- Exchange (130) + +- Project (129) + +- Filter (128) + +- Scan parquet (127) + + +(83) Scan parquet Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey)] ReadSchema: struct -(80) NoopFilter +(84) NoopFilter Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] -(81) ProjectExecTransformer +(85) ProjectExecTransformer Output [4]: [hash(ps_suppkey#X, 42) AS hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] -(82) WholeStageCodegenTransformer (X) +(86) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: false -(83) VeloxAppendBatches +(87) VeloxAppendBatches Input [4]: [hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: X -(84) ColumnarExchange +(88) ColumnarExchange Input [4]: [hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [ps_suppkey#X, ps_availqty#X, ps_supplycost#X], [plan_id=X], [id=#X] -(85) ShuffleQueryStage +(89) ShuffleQueryStage Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: X -(86) InputAdapter +(90) InputAdapter Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] -(87) InputIteratorTransformer +(91) InputIteratorTransformer Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] -(88) ReusedExchange [Reuses operator id: 15] +(92) ReusedExchange [Reuses operator id: 15] Output [2]: [s_suppkey#X, s_nationkey#X] -(89) ShuffleQueryStage +(93) ShuffleQueryStage Output [2]: [s_suppkey#X, s_nationkey#X] Arguments: X -(90) InputAdapter +(94) InputAdapter Input [2]: [s_suppkey#X, s_nationkey#X] -(91) InputIteratorTransformer +(95) InputIteratorTransformer Input [2]: [s_suppkey#X, s_nationkey#X] -(92) ShuffledHashJoinExecTransformer +(96) ShuffledHashJoinExecTransformer Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(93) ProjectExecTransformer +(97) ProjectExecTransformer Output [4]: [hash(s_nationkey#X, 42) AS hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [5]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(94) WholeStageCodegenTransformer (X) +(98) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: false -(95) VeloxAppendBatches +(99) VeloxAppendBatches Input [4]: [hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: X -(96) ColumnarExchange +(100) ColumnarExchange Input [4]: [hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [ps_availqty#X, ps_supplycost#X, s_nationkey#X], [plan_id=X], [id=#X] -(97) ShuffleQueryStage +(101) ShuffleQueryStage Output [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: X -(98) InputAdapter +(102) InputAdapter Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] -(99) InputIteratorTransformer +(103) InputIteratorTransformer Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] -(100) ReusedExchange [Reuses operator id: 32] +(104) ReusedExchange [Reuses operator id: 32] Output [1]: [n_nationkey#X] -(101) ShuffleQueryStage +(105) ShuffleQueryStage Output [1]: [n_nationkey#X] Arguments: X -(102) InputAdapter +(106) InputAdapter Input [1]: [n_nationkey#X] -(103) InputIteratorTransformer +(107) InputIteratorTransformer Input [1]: [n_nationkey#X] -(104) ShuffledHashJoinExecTransformer +(108) ShuffledHashJoinExecTransformer Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(105) ProjectExecTransformer +(109) ProjectExecTransformer Output [3]: [ps_availqty#X, ps_supplycost#X, CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)) AS _pre_X#X] Input [4]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(106) RegularHashAggregateExecTransformer +(110) RegularHashAggregateExecTransformer Input [3]: [ps_availqty#X, ps_supplycost#X, _pre_X#X] Keys: [] Functions [1]: [partial_sum(_pre_X#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(107) RegularHashAggregateExecTransformer +(111) RegularHashAggregateExecTransformer Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X] Results [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X] -(108) ProjectExecTransformer +(112) ProjectExecTransformer Output [1]: [CheckOverflow((promote_precision(cast(sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X as decimal(38,10))) * 0.0001000000), DecimalType(38,6)) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Input [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X] -(109) WholeStageCodegenTransformer (X) +(113) WholeStageCodegenTransformer (X) Input [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Arguments: false -(110) VeloxColumnarToRowExec +(114) VeloxColumnarToRowExec Input [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] -(111) Scan parquet +(115) Scan parquet Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey)] ReadSchema: struct -(112) Filter +(116) Filter Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Condition : isnotnull(ps_suppkey#X) -(113) Exchange +(117) Exchange Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) Scan parquet +(118) Sort +Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(119) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(115) Filter +(120) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(116) Exchange +(121) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) ShuffledHashJoin +(122) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(123) SortMergeJoin Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(118) Project +(124) Project Output [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [5]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(119) Exchange +(125) Exchange Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) Scan parquet +(126) Sort +Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(127) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,GERMANY), IsNotNull(n_nationkey)] ReadSchema: struct -(121) Filter +(128) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = GERMANY)) AND isnotnull(n_nationkey#X)) -(122) Project +(129) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(123) Exchange +(130) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(124) ShuffledHashJoin +(131) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(132) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(125) Project +(133) Project Output [2]: [ps_availqty#X, ps_supplycost#X] Input [4]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(126) HashAggregate +(134) HashAggregate Input [2]: [ps_availqty#X, ps_supplycost#X] Keys: [] Functions [1]: [partial_sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(127) HashAggregate +(135) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X] Results [1]: [CheckOverflow((promote_precision(cast(sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X as decimal(38,10))) * 0.0001000000), DecimalType(38,6)) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] -(128) AdaptiveSparkPlan +(136) AdaptiveSparkPlan Output [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/12.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/12.txt index dd1259eb8876..b0f084e2d048 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/12.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/12.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (53) +AdaptiveSparkPlan (55) +- == Final Plan == VeloxColumnarToRowExec (38) +- ^ SortExecTransformer (36) @@ -31,20 +31,22 @@ AdaptiveSparkPlan (53) +- ^ NoopFilter (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (52) - +- Exchange (51) - +- HashAggregate (50) - +- Exchange (49) - +- HashAggregate (48) - +- Project (47) - +- ShuffledHashJoin Inner BuildLeft (46) - :- Exchange (41) - : +- Filter (40) - : +- Scan parquet (39) - +- Exchange (45) - +- Project (44) - +- Filter (43) - +- Scan parquet (42) + Sort (54) + +- Exchange (53) + +- HashAggregate (52) + +- Exchange (51) + +- HashAggregate (50) + +- Project (49) + +- SortMergeJoin Inner (48) + :- Sort (42) + : +- Exchange (41) + : +- Filter (40) + : +- Scan parquet (39) + +- Sort (47) + +- Exchange (46) + +- Project (45) + +- Filter (44) + +- Scan parquet (43) (1) Scan parquet @@ -218,60 +220,68 @@ Condition : isnotnull(o_orderkey#X) Input [2]: [o_orderkey#X, o_orderpriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(42) Scan parquet +(42) Sort +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(43) Scan parquet Output [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate), IsNotNull(l_shipdate), In(l_shipmode, [MAIL,SHIP]), GreaterThanOrEqual(l_receiptdate,1994-01-01), LessThan(l_receiptdate,1995-01-01), IsNotNull(l_orderkey)] ReadSchema: struct -(43) Filter +(44) Filter Input [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] Condition : ((((((((isnotnull(l_commitdate#X) AND isnotnull(l_receiptdate#X)) AND isnotnull(l_shipdate#X)) AND l_shipmode#X IN (MAIL,SHIP)) AND (l_commitdate#X < l_receiptdate#X)) AND (l_shipdate#X < l_commitdate#X)) AND (l_receiptdate#X >= 1994-01-01)) AND (l_receiptdate#X < 1995-01-01)) AND isnotnull(l_orderkey#X)) -(44) Project +(45) Project Output [2]: [l_orderkey#X, l_shipmode#X] Input [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] -(45) Exchange +(46) Exchange Input [2]: [l_orderkey#X, l_shipmode#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(46) ShuffledHashJoin +(47) Sort +Input [2]: [l_orderkey#X, l_shipmode#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(48) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(47) Project +(49) Project Output [2]: [o_orderpriority#X, l_shipmode#X] Input [4]: [o_orderkey#X, o_orderpriority#X, l_orderkey#X, l_shipmode#X] -(48) HashAggregate +(50) HashAggregate Input [2]: [o_orderpriority#X, l_shipmode#X] Keys [1]: [l_shipmode#X] Functions [2]: [partial_sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)] Aggregate Attributes [2]: [sum#X, sum#X] Results [3]: [l_shipmode#X, sum#X, sum#X] -(49) Exchange +(51) Exchange Input [3]: [l_shipmode#X, sum#X, sum#X] Arguments: hashpartitioning(l_shipmode#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(50) HashAggregate +(52) HashAggregate Input [3]: [l_shipmode#X, sum#X, sum#X] Keys [1]: [l_shipmode#X] Functions [2]: [sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END), sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)] Aggregate Attributes [2]: [sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X, sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X] Results [3]: [l_shipmode#X, sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X AS high_line_count#X, sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X AS low_line_count#X] -(51) Exchange +(53) Exchange Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: rangepartitioning(l_shipmode#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(52) Sort +(54) Sort Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: [l_shipmode#X ASC NULLS FIRST], true, 0 -(53) AdaptiveSparkPlan +(55) AdaptiveSparkPlan Output [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/13.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/13.txt index d43ad2a9c271..07c32ff95fb1 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/13.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/13.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (56) +AdaptiveSparkPlan (58) +- == Final Plan == VeloxColumnarToRowExec (40) +- ^ SortExecTransformer (38) @@ -33,21 +33,23 @@ AdaptiveSparkPlan (56) +- ^ NoopFilter (10) +- ^ Scan parquet (9) +- == Initial Plan == - Sort (55) - +- Exchange (54) - +- HashAggregate (53) - +- Exchange (52) - +- HashAggregate (51) - +- HashAggregate (50) - +- HashAggregate (49) - +- Project (48) - +- ShuffledHashJoin LeftOuter BuildRight (47) - :- Exchange (42) - : +- Scan parquet (41) - +- Exchange (46) - +- Project (45) - +- Filter (44) - +- Scan parquet (43) + Sort (57) + +- Exchange (56) + +- HashAggregate (55) + +- Exchange (54) + +- HashAggregate (53) + +- HashAggregate (52) + +- HashAggregate (51) + +- Project (50) + +- SortMergeJoin LeftOuter (49) + :- Sort (43) + : +- Exchange (42) + : +- Scan parquet (41) + +- Sort (48) + +- Exchange (47) + +- Project (46) + +- Filter (45) + +- Scan parquet (44) (1) Scan parquet @@ -229,74 +231,82 @@ ReadSchema: struct Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) Scan parquet +(43) Sort +Input [1]: [c_custkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(44) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_comment), IsNotNull(o_custkey)] ReadSchema: struct -(44) Filter +(45) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Condition : ((isnotnull(o_comment#X) AND NOT o_comment#X LIKE %special%requests%) AND isnotnull(o_custkey#X)) -(45) Project +(46) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] -(46) Exchange +(47) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) ShuffledHashJoin +(48) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(49) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(48) Project +(50) Project Output [2]: [c_custkey#X, o_orderkey#X] Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] -(49) HashAggregate +(51) HashAggregate Input [2]: [c_custkey#X, o_orderkey#X] Keys [1]: [c_custkey#X] Functions [1]: [partial_count(o_orderkey#X)] Aggregate Attributes [1]: [count#X] Results [2]: [c_custkey#X, count#X] -(50) HashAggregate +(52) HashAggregate Input [2]: [c_custkey#X, count#X] Keys [1]: [c_custkey#X] Functions [1]: [count(o_orderkey#X)] Aggregate Attributes [1]: [count(o_orderkey#X)#X] Results [1]: [count(o_orderkey#X)#X AS c_count#X] -(51) HashAggregate +(53) HashAggregate Input [1]: [c_count#X] Keys [1]: [c_count#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [c_count#X, count#X] -(52) Exchange +(54) Exchange Input [2]: [c_count#X, count#X] Arguments: hashpartitioning(c_count#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(53) HashAggregate +(55) HashAggregate Input [2]: [c_count#X, count#X] Keys [1]: [c_count#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [c_count#X, count(1)#X AS custdist#X] -(54) Exchange +(56) Exchange Input [2]: [c_count#X, custdist#X] Arguments: rangepartitioning(custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(55) Sort +(57) Sort Input [2]: [c_count#X, custdist#X] Arguments: [custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST], true, 0 -(56) AdaptiveSparkPlan +(58) AdaptiveSparkPlan Output [2]: [c_count#X, custdist#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/14.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/14.txt index cb3ddbb3a2f7..11bbb2a71e79 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/14.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/14.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (37) +AdaptiveSparkPlan (39) +- == Final Plan == VeloxColumnarToRowExec (25) +- ^ ProjectExecTransformer (23) @@ -22,17 +22,19 @@ AdaptiveSparkPlan (37) +- ^ NoopFilter (11) +- ^ Scan parquet (10) +- == Initial Plan == - HashAggregate (36) - +- HashAggregate (35) - +- Project (34) - +- ShuffledHashJoin Inner BuildRight (33) - :- Exchange (29) - : +- Project (28) - : +- Filter (27) - : +- Scan parquet (26) - +- Exchange (32) - +- Filter (31) - +- Scan parquet (30) + HashAggregate (38) + +- HashAggregate (37) + +- Project (36) + +- SortMergeJoin Inner (35) + :- Sort (30) + : +- Exchange (29) + : +- Project (28) + : +- Filter (27) + : +- Scan parquet (26) + +- Sort (34) + +- Exchange (33) + +- Filter (32) + +- Scan parquet (31) (1) Scan parquet @@ -162,44 +164,52 @@ Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(30) Scan parquet +(30) Sort +Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(31) Scan parquet Output [2]: [p_partkey#X, p_type#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_partkey)] ReadSchema: struct -(31) Filter +(32) Filter Input [2]: [p_partkey#X, p_type#X] Condition : isnotnull(p_partkey#X) -(32) Exchange +(33) Exchange Input [2]: [p_partkey#X, p_type#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(33) ShuffledHashJoin +(34) Sort +Input [2]: [p_partkey#X, p_type#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(35) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(34) Project +(36) Project Output [3]: [l_extendedprice#X, l_discount#X, p_type#X] Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] -(35) HashAggregate +(37) HashAggregate Input [3]: [l_extendedprice#X, l_discount#X, p_type#X] Keys: [] Functions [2]: [partial_sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END), partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Results [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -(36) HashAggregate +(38) HashAggregate Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Keys: [] Functions [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END), sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END)#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [1]: [CheckOverflow((promote_precision(CheckOverflow((100.0000 * promote_precision(sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END)#X)), DecimalType(38,6))) / promote_precision(cast(sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X as decimal(38,6)))), DecimalType(38,6)) AS promo_revenue#X] -(37) AdaptiveSparkPlan +(39) AdaptiveSparkPlan Output [1]: [promo_revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/15.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/15.txt index 60521d6c62cd..be97f58cf438 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/15.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/15.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (45) +AdaptiveSparkPlan (47) +- == Final Plan == VeloxColumnarToRowExec (30) +- AQEShuffleRead (29) @@ -27,20 +27,22 @@ AdaptiveSparkPlan (45) +- ^ NoopFilter (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (44) - +- Exchange (43) - +- Project (42) - +- ShuffledHashJoin Inner BuildLeft (41) - :- Exchange (33) - : +- Filter (32) - : +- Scan parquet (31) - +- Filter (40) - +- HashAggregate (39) - +- Exchange (38) - +- HashAggregate (37) - +- Project (36) - +- Filter (35) - +- Scan parquet (34) + Sort (46) + +- Exchange (45) + +- Project (44) + +- SortMergeJoin Inner (43) + :- Sort (34) + : +- Exchange (33) + : +- Filter (32) + : +- Scan parquet (31) + +- Sort (42) + +- Filter (41) + +- HashAggregate (40) + +- Exchange (39) + +- HashAggregate (38) + +- Project (37) + +- Filter (36) + +- Scan parquet (35) (1) Scan parquet @@ -186,221 +188,229 @@ Condition : isnotnull(s_suppkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(34) Scan parquet +(34) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(35) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01), IsNotNull(l_suppkey)] ReadSchema: struct -(35) Filter +(36) Filter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : (((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) AND isnotnull(l_suppkey#X)) -(36) Project +(37) Project Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(37) HashAggregate +(38) HashAggregate Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(38) Exchange +(39) Exchange Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(39) HashAggregate +(40) HashAggregate Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [2]: [l_suppkey#X AS supplier_no#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS total_revenue#X] -(40) Filter +(41) Filter Input [2]: [supplier_no#X, total_revenue#X] Condition : (isnotnull(total_revenue#X) AND (total_revenue#X = Subquery subquery#X, [id=#X])) -(41) ShuffledHashJoin +(42) Sort +Input [2]: [supplier_no#X, total_revenue#X] +Arguments: [supplier_no#X ASC NULLS FIRST], false, 0 + +(43) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [supplier_no#X] Join condition: None -(42) Project +(44) Project Output [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Input [6]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, supplier_no#X, total_revenue#X] -(43) Exchange +(45) Exchange Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: rangepartitioning(s_suppkey#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(44) Sort +(46) Sort Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: [s_suppkey#X ASC NULLS FIRST], true, 0 -(45) AdaptiveSparkPlan +(47) AdaptiveSparkPlan Output [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 22 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (71) +AdaptiveSparkPlan (73) +- == Final Plan == - VeloxColumnarToRowExec (62) - +- ^ RegularHashAggregateExecTransformer (60) - +- ^ RegularHashAggregateExecTransformer (59) - +- ^ ProjectExecTransformer (58) - +- ^ RegularHashAggregateExecTransformer (57) - +- ^ InputIteratorTransformer (56) - +- ShuffleQueryStage (54), Statistics(X) - +- ColumnarExchange (53) - +- VeloxAppendBatches (52) - +- ^ ProjectExecTransformer (50) - +- ^ FlushableHashAggregateExecTransformer (49) - +- ^ ProjectExecTransformer (48) - +- ^ NoopFilter (47) - +- ^ Scan parquet (46) + VeloxColumnarToRowExec (64) + +- ^ RegularHashAggregateExecTransformer (62) + +- ^ RegularHashAggregateExecTransformer (61) + +- ^ ProjectExecTransformer (60) + +- ^ RegularHashAggregateExecTransformer (59) + +- ^ InputIteratorTransformer (58) + +- ShuffleQueryStage (56), Statistics(X) + +- ColumnarExchange (55) + +- VeloxAppendBatches (54) + +- ^ ProjectExecTransformer (52) + +- ^ FlushableHashAggregateExecTransformer (51) + +- ^ ProjectExecTransformer (50) + +- ^ NoopFilter (49) + +- ^ Scan parquet (48) +- == Initial Plan == - HashAggregate (70) - +- HashAggregate (69) - +- HashAggregate (68) - +- Exchange (67) - +- HashAggregate (66) - +- Project (65) - +- Filter (64) - +- Scan parquet (63) + HashAggregate (72) + +- HashAggregate (71) + +- HashAggregate (70) + +- Exchange (69) + +- HashAggregate (68) + +- Project (67) + +- Filter (66) + +- Scan parquet (65) -(46) Scan parquet +(48) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01)] ReadSchema: struct -(47) NoopFilter +(49) NoopFilter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(48) ProjectExecTransformer +(50) ProjectExecTransformer Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) AS _pre_X#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(49) FlushableHashAggregateExecTransformer +(51) FlushableHashAggregateExecTransformer Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, _pre_X#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum(_pre_X#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(50) ProjectExecTransformer +(52) ProjectExecTransformer Output [4]: [hash(l_suppkey#X, 42) AS hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] Input [3]: [l_suppkey#X, sum#X, isEmpty#X] -(51) WholeStageCodegenTransformer (X) +(53) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] Arguments: false -(52) VeloxAppendBatches +(54) VeloxAppendBatches Input [4]: [hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] Arguments: X -(53) ColumnarExchange +(55) ColumnarExchange Input [4]: [hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [l_suppkey#X, sum#X, isEmpty#X], [plan_id=X], [id=#X] -(54) ShuffleQueryStage +(56) ShuffleQueryStage Output [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: X -(55) InputAdapter +(57) InputAdapter Input [3]: [l_suppkey#X, sum#X, isEmpty#X] -(56) InputIteratorTransformer +(58) InputIteratorTransformer Input [3]: [l_suppkey#X, sum#X, isEmpty#X] -(57) RegularHashAggregateExecTransformer +(59) RegularHashAggregateExecTransformer Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [2]: [l_suppkey#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] -(58) ProjectExecTransformer +(60) ProjectExecTransformer Output [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS total_revenue#X] Input [2]: [l_suppkey#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] -(59) RegularHashAggregateExecTransformer +(61) RegularHashAggregateExecTransformer Input [1]: [total_revenue#X] Keys: [] Functions [1]: [partial_max(total_revenue#X)] Aggregate Attributes [1]: [max#X] Results [1]: [max#X] -(60) RegularHashAggregateExecTransformer +(62) RegularHashAggregateExecTransformer Input [1]: [max#X] Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(61) WholeStageCodegenTransformer (X) +(63) WholeStageCodegenTransformer (X) Input [1]: [max(total_revenue)#X] Arguments: false -(62) VeloxColumnarToRowExec +(64) VeloxColumnarToRowExec Input [1]: [max(total_revenue)#X] -(63) Scan parquet +(65) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01)] ReadSchema: struct -(64) Filter +(66) Filter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) -(65) Project +(67) Project Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(66) HashAggregate +(68) HashAggregate Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(67) Exchange +(69) Exchange Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(68) HashAggregate +(70) HashAggregate Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS total_revenue#X] -(69) HashAggregate +(71) HashAggregate Input [1]: [total_revenue#X] Keys: [] Functions [1]: [partial_max(total_revenue#X)] Aggregate Attributes [1]: [max#X] Results [1]: [max#X] -(70) HashAggregate +(72) HashAggregate Input [1]: [max#X] Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(71) AdaptiveSparkPlan +(73) AdaptiveSparkPlan Output [1]: [max(total_revenue)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/16.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/16.txt index 029516a40506..86d2f321f653 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/16.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/16.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (69) +AdaptiveSparkPlan (71) +- == Final Plan == VeloxColumnarToRowExec (47) +- ^ SortExecTransformer (45) @@ -38,27 +38,29 @@ AdaptiveSparkPlan (69) +- ^ NoopFilter (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (68) - +- Exchange (67) - +- HashAggregate (66) - +- Exchange (65) - +- HashAggregate (64) - +- HashAggregate (63) - +- Exchange (62) - +- HashAggregate (61) - +- Project (60) - +- ShuffledHashJoin Inner BuildRight (59) - :- Exchange (55) - : +- BroadcastHashJoin LeftAnti BuildRight (54) - : :- Filter (49) - : : +- Scan parquet (48) - : +- BroadcastExchange (53) - : +- Project (52) - : +- Filter (51) - : +- Scan parquet (50) - +- Exchange (58) - +- Filter (57) - +- Scan parquet (56) + Sort (70) + +- Exchange (69) + +- HashAggregate (68) + +- Exchange (67) + +- HashAggregate (66) + +- HashAggregate (65) + +- Exchange (64) + +- HashAggregate (63) + +- Project (62) + +- SortMergeJoin Inner (61) + :- Sort (56) + : +- Exchange (55) + : +- BroadcastHashJoin LeftAnti BuildRight (54) + : :- Filter (49) + : : +- Scan parquet (48) + : +- BroadcastExchange (53) + : +- Project (52) + : +- Filter (51) + : +- Scan parquet (50) + +- Sort (60) + +- Exchange (59) + +- Filter (58) + +- Scan parquet (57) (1) Scan parquet @@ -296,74 +298,82 @@ Join condition: None Input [2]: [ps_partkey#X, ps_suppkey#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(56) Scan parquet +(56) Sort +Input [2]: [ps_partkey#X, ps_suppkey#X] +Arguments: [ps_partkey#X ASC NULLS FIRST], false, 0 + +(57) Scan parquet Output [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_brand), IsNotNull(p_type), Not(EqualTo(p_brand,Brand#X)), Not(StringStartsWith(p_type,MEDIUM POLISHED)), In(p_size, [14,19,23,3,36,45,49,9]), IsNotNull(p_partkey)] ReadSchema: struct -(57) Filter +(58) Filter Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Condition : (((((isnotnull(p_brand#X) AND isnotnull(p_type#X)) AND NOT (p_brand#X = Brand#X)) AND NOT StartsWith(p_type#X, MEDIUM POLISHED)) AND p_size#X IN (49,14,23,45,19,3,36,9)) AND isnotnull(p_partkey#X)) -(58) Exchange +(59) Exchange Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(59) ShuffledHashJoin +(60) Sort +Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(61) SortMergeJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(60) Project +(62) Project Output [4]: [ps_suppkey#X, p_brand#X, p_type#X, p_size#X] Input [6]: [ps_partkey#X, ps_suppkey#X, p_partkey#X, p_brand#X, p_type#X, p_size#X] -(61) HashAggregate +(63) HashAggregate Input [4]: [ps_suppkey#X, p_brand#X, p_type#X, p_size#X] Keys [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Functions: [] Aggregate Attributes: [] Results [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] -(62) Exchange +(64) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Arguments: hashpartitioning(p_brand#X, p_type#X, p_size#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(63) HashAggregate +(65) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Keys [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Functions: [] Aggregate Attributes: [] Results [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] -(64) HashAggregate +(66) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Keys [3]: [p_brand#X, p_type#X, p_size#X] Functions [1]: [partial_count(distinct ps_suppkey#X)] Aggregate Attributes [1]: [count(ps_suppkey#X)#X] Results [4]: [p_brand#X, p_type#X, p_size#X, count#X] -(65) Exchange +(67) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] Arguments: hashpartitioning(p_brand#X, p_type#X, p_size#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(66) HashAggregate +(68) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] Keys [3]: [p_brand#X, p_type#X, p_size#X] Functions [1]: [count(distinct ps_suppkey#X)] Aggregate Attributes [1]: [count(ps_suppkey#X)#X] Results [4]: [p_brand#X, p_type#X, p_size#X, count(ps_suppkey#X)#X AS supplier_cnt#X] -(67) Exchange +(69) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: rangepartitioning(supplier_cnt#X DESC NULLS LAST, p_brand#X ASC NULLS FIRST, p_type#X ASC NULLS FIRST, p_size#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(68) Sort +(70) Sort Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: [supplier_cnt#X DESC NULLS LAST, p_brand#X ASC NULLS FIRST, p_type#X ASC NULLS FIRST, p_size#X ASC NULLS FIRST], true, 0 -(69) AdaptiveSparkPlan +(71) AdaptiveSparkPlan Output [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/17.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/17.txt index e2d1503799a9..6a2e47576cad 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/17.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/17.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (60) +AdaptiveSparkPlan (63) +- == Final Plan == VeloxColumnarToRowExec (40) +- ^ ProjectExecTransformer (38) @@ -35,25 +35,28 @@ AdaptiveSparkPlan (60) +- ^ NoopFilter (22) +- ^ Scan parquet (21) +- == Initial Plan == - HashAggregate (59) - +- HashAggregate (58) - +- Project (57) - +- ShuffledHashJoin Inner BuildRight (56) - :- Project (49) - : +- ShuffledHashJoin Inner BuildRight (48) - : :- Exchange (43) - : : +- Filter (42) - : : +- Scan parquet (41) - : +- Exchange (47) - : +- Project (46) - : +- Filter (45) - : +- Scan parquet (44) - +- Filter (55) - +- HashAggregate (54) - +- Exchange (53) - +- HashAggregate (52) - +- Filter (51) - +- Scan parquet (50) + HashAggregate (62) + +- HashAggregate (61) + +- Project (60) + +- SortMergeJoin Inner (59) + :- Project (51) + : +- SortMergeJoin Inner (50) + : :- Sort (44) + : : +- Exchange (43) + : : +- Filter (42) + : : +- Scan parquet (41) + : +- Sort (49) + : +- Exchange (48) + : +- Project (47) + : +- Filter (46) + : +- Scan parquet (45) + +- Sort (58) + +- Filter (57) + +- HashAggregate (56) + +- Exchange (55) + +- HashAggregate (54) + +- Filter (53) + +- Scan parquet (52) (1) Scan parquet @@ -247,90 +250,102 @@ Condition : (isnotnull(l_partkey#X) AND isnotnull(l_quantity#X)) Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(44) Scan parquet +(44) Sort +Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(45) Scan parquet Output [3]: [p_partkey#X, p_brand#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_brand), IsNotNull(p_container), EqualTo(p_brand,Brand#X), EqualTo(p_container,MED BOX), IsNotNull(p_partkey)] ReadSchema: struct -(45) Filter +(46) Filter Input [3]: [p_partkey#X, p_brand#X, p_container#X] Condition : ((((isnotnull(p_brand#X) AND isnotnull(p_container#X)) AND (p_brand#X = Brand#X)) AND (p_container#X = MED BOX)) AND isnotnull(p_partkey#X)) -(46) Project +(47) Project Output [1]: [p_partkey#X] Input [3]: [p_partkey#X, p_brand#X, p_container#X] -(47) Exchange +(48) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) ShuffledHashJoin +(49) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(50) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(49) Project +(51) Project Output [3]: [l_quantity#X, l_extendedprice#X, p_partkey#X] Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, p_partkey#X] -(50) Scan parquet +(52) Scan parquet Output [2]: [l_partkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey)] ReadSchema: struct -(51) Filter +(53) Filter Input [2]: [l_partkey#X, l_quantity#X] Condition : isnotnull(l_partkey#X) -(52) HashAggregate +(54) HashAggregate Input [2]: [l_partkey#X, l_quantity#X] Keys [1]: [l_partkey#X] Functions [1]: [partial_avg(l_quantity#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [3]: [l_partkey#X, sum#X, count#X] -(53) Exchange +(55) Exchange Input [3]: [l_partkey#X, sum#X, count#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(54) HashAggregate +(56) HashAggregate Input [3]: [l_partkey#X, sum#X, count#X] Keys [1]: [l_partkey#X] Functions [1]: [avg(l_quantity#X)] Aggregate Attributes [1]: [avg(l_quantity#X)#X] Results [2]: [CheckOverflow((0.200000 * promote_precision(avg(l_quantity#X)#X)), DecimalType(18,7)) AS (0.2 * avg(l_quantity))#X, l_partkey#X] -(55) Filter +(57) Filter Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] Condition : isnotnull((0.2 * avg(l_quantity))#X) -(56) ShuffledHashJoin +(58) Sort +Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(59) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join condition: (cast(l_quantity#X as decimal(18,7)) < (0.2 * avg(l_quantity))#X) -(57) Project +(60) Project Output [1]: [l_extendedprice#X] Input [5]: [l_quantity#X, l_extendedprice#X, p_partkey#X, (0.2 * avg(l_quantity))#X, l_partkey#X] -(58) HashAggregate +(61) HashAggregate Input [1]: [l_extendedprice#X] Keys: [] Functions [1]: [partial_sum(l_extendedprice#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(59) HashAggregate +(62) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(l_extendedprice#X)] Aggregate Attributes [1]: [sum(l_extendedprice#X)#X] Results [1]: [CheckOverflow((promote_precision(sum(l_extendedprice#X)#X) / 7.00), DecimalType(27,6)) AS avg_yearly#X] -(60) AdaptiveSparkPlan +(63) AdaptiveSparkPlan Output [1]: [avg_yearly#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/18.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/18.txt index a0e052432bb3..7fe13a003017 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/18.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/18.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (103) +AdaptiveSparkPlan (110) +- == Final Plan == VeloxColumnarToRowExec (70) +- TakeOrderedAndProjectExecTransformer (69) @@ -58,38 +58,45 @@ AdaptiveSparkPlan (103) +- ShuffleQueryStage (57), Statistics(X) +- ReusedExchange (56) +- == Initial Plan == - TakeOrderedAndProject (102) - +- HashAggregate (101) - +- HashAggregate (100) - +- Project (99) - +- ShuffledHashJoin Inner BuildRight (98) - :- Exchange (87) - : +- Project (86) - : +- ShuffledHashJoin Inner BuildLeft (85) - : :- Exchange (73) - : : +- Filter (72) - : : +- Scan parquet (71) - : +- Exchange (84) - : +- ShuffledHashJoin LeftSemi BuildRight (83) - : :- Exchange (76) - : : +- Filter (75) - : : +- Scan parquet (74) - : +- Project (82) - : +- Filter (81) - : +- HashAggregate (80) - : +- Exchange (79) - : +- HashAggregate (78) - : +- Scan parquet (77) - +- ShuffledHashJoin LeftSemi BuildRight (97) - :- Exchange (90) - : +- Filter (89) - : +- Scan parquet (88) - +- Project (96) - +- Filter (95) - +- HashAggregate (94) - +- Exchange (93) - +- HashAggregate (92) - +- Scan parquet (91) + TakeOrderedAndProject (109) + +- HashAggregate (108) + +- HashAggregate (107) + +- Project (106) + +- SortMergeJoin Inner (105) + :- Sort (92) + : +- Exchange (91) + : +- Project (90) + : +- SortMergeJoin Inner (89) + : :- Sort (74) + : : +- Exchange (73) + : : +- Filter (72) + : : +- Scan parquet (71) + : +- Sort (88) + : +- Exchange (87) + : +- SortMergeJoin LeftSemi (86) + : :- Sort (78) + : : +- Exchange (77) + : : +- Filter (76) + : : +- Scan parquet (75) + : +- Sort (85) + : +- Project (84) + : +- Filter (83) + : +- HashAggregate (82) + : +- Exchange (81) + : +- HashAggregate (80) + : +- Scan parquet (79) + +- SortMergeJoin LeftSemi (104) + :- Sort (96) + : +- Exchange (95) + : +- Filter (94) + : +- Scan parquet (93) + +- Sort (103) + +- Project (102) + +- Filter (101) + +- HashAggregate (100) + +- Exchange (99) + +- HashAggregate (98) + +- Scan parquet (97) (1) Scan parquet @@ -401,154 +408,182 @@ Condition : isnotnull(c_custkey#X) Input [2]: [c_custkey#X, c_name#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) Scan parquet +(74) Sort +Input [2]: [c_custkey#X, c_name#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(75) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(75) Filter +(76) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Condition : (isnotnull(o_custkey#X) AND isnotnull(o_orderkey#X)) -(76) Exchange +(77) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(77) Scan parquet +(78) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(79) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(78) HashAggregate +(80) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(79) Exchange +(81) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(80) HashAggregate +(82) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(81) Filter +(83) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(82) Project +(84) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(83) ShuffledHashJoin +(85) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(86) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(84) Exchange +(87) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(85) ShuffledHashJoin +(88) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(89) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(86) Project +(90) Project Output [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] -(87) Exchange +(91) Exchange Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(88) Scan parquet +(92) Sort +Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(93) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_orderkey)] ReadSchema: struct -(89) Filter +(94) Filter Input [2]: [l_orderkey#X, l_quantity#X] Condition : isnotnull(l_orderkey#X) -(90) Exchange +(95) Exchange Input [2]: [l_orderkey#X, l_quantity#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(91) Scan parquet +(96) Sort +Input [2]: [l_orderkey#X, l_quantity#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(97) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(92) HashAggregate +(98) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(93) Exchange +(99) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(94) HashAggregate +(100) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(95) Filter +(101) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(96) Project +(102) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(97) ShuffledHashJoin +(103) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(104) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(98) ShuffledHashJoin +(105) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(99) Project +(106) Project Output [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_orderkey#X, l_quantity#X] -(100) HashAggregate +(107) HashAggregate Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] -(101) HashAggregate +(108) HashAggregate Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity#X)#X AS sum(l_quantity)#X] -(102) TakeOrderedAndProject +(109) TakeOrderedAndProject Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: X, [o_totalprice#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] -(103) AdaptiveSparkPlan +(110) AdaptiveSparkPlan Output [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/19.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/19.txt index 440383aa1cd7..34abb726b85a 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/19.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/19.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (36) +AdaptiveSparkPlan (38) +- == Final Plan == VeloxColumnarToRowExec (24) +- ^ RegularHashAggregateExecTransformer (22) @@ -21,17 +21,19 @@ AdaptiveSparkPlan (36) +- ^ NoopFilter (11) +- ^ Scan parquet (10) +- == Initial Plan == - HashAggregate (35) - +- HashAggregate (34) - +- Project (33) - +- ShuffledHashJoin Inner BuildRight (32) - :- Exchange (28) - : +- Project (27) - : +- Filter (26) - : +- Scan parquet (25) - +- Exchange (31) - +- Filter (30) - +- Scan parquet (29) + HashAggregate (37) + +- HashAggregate (36) + +- Project (35) + +- SortMergeJoin Inner (34) + :- Sort (29) + : +- Exchange (28) + : +- Project (27) + : +- Filter (26) + : +- Scan parquet (25) + +- Sort (33) + +- Exchange (32) + +- Filter (31) + +- Scan parquet (30) (1) Scan parquet @@ -157,44 +159,52 @@ Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipin Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(29) Scan parquet +(29) Sort +Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(30) Scan parquet Output [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_size), GreaterThanOrEqual(p_size,1), IsNotNull(p_partkey), Or(Or(And(And(EqualTo(p_brand,Brand#X),In(p_container, [SM BOX,SM CASE,SM PACK,SM PKG])),LessThanOrEqual(p_size,5)),And(And(EqualTo(p_brand,Brand#X),In(p_container, [MED BAG,MED BOX,MED PACK,MED PKG])),LessThanOrEqual(p_size,10))),And(And(EqualTo(p_brand,Brand#X),In(p_container, [LG BOX,LG CASE,LG PACK,LG PKG])),LessThanOrEqual(p_size,15)))] ReadSchema: struct -(30) Filter +(31) Filter Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Condition : (((isnotnull(p_size#X) AND (p_size#X >= 1)) AND isnotnull(p_partkey#X)) AND (((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (p_size#X <= 5)) OR (((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (p_size#X <= 10))) OR (((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (p_size#X <= 15)))) -(31) Exchange +(32) Exchange Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(32) ShuffledHashJoin +(33) Sort +Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(34) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: (((((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (l_quantity#X >= 1.00)) AND (l_quantity#X <= 11.00)) AND (p_size#X <= 5)) OR (((((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (l_quantity#X >= 10.00)) AND (l_quantity#X <= 20.00)) AND (p_size#X <= 10))) OR (((((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (l_quantity#X >= 20.00)) AND (l_quantity#X <= 30.00)) AND (p_size#X <= 15))) -(33) Project +(35) Project Output [2]: [l_extendedprice#X, l_discount#X] Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_brand#X, p_size#X, p_container#X] -(34) HashAggregate +(36) HashAggregate Input [2]: [l_extendedprice#X, l_discount#X] Keys: [] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(35) HashAggregate +(37) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS revenue#X] -(36) AdaptiveSparkPlan +(38) AdaptiveSparkPlan Output [1]: [revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/20.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/20.txt index d3cff30e0ce2..2cf50b2a3a98 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/20.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/20.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (133) +AdaptiveSparkPlan (143) +- == Final Plan == VeloxColumnarToRowExec (93) +- AQEShuffleRead (92) @@ -75,45 +75,55 @@ AdaptiveSparkPlan (133) +- ^ NoopFilter (78) +- ^ Scan parquet (77) +- == Initial Plan == - Sort (132) - +- Exchange (131) - +- Project (130) - +- ShuffledHashJoin Inner BuildRight (129) - :- Exchange (124) - : +- Project (123) - : +- ShuffledHashJoin LeftSemi BuildRight (122) - : :- Exchange (96) - : : +- Filter (95) - : : +- Scan parquet (94) - : +- Exchange (121) - : +- Project (120) - : +- ShuffledHashJoin Inner BuildLeft (119) - : :- Exchange (105) - : : +- ShuffledHashJoin LeftSemi BuildRight (104) - : : :- Exchange (99) - : : : +- Filter (98) - : : : +- Scan parquet (97) - : : +- Exchange (103) - : : +- Project (102) - : : +- Filter (101) - : : +- Scan parquet (100) - : +- Exchange (118) - : +- Filter (117) - : +- HashAggregate (116) - : +- HashAggregate (115) - : +- ShuffledHashJoin LeftSemi BuildRight (114) - : :- Exchange (109) - : : +- Project (108) - : : +- Filter (107) - : : +- Scan parquet (106) - : +- Exchange (113) - : +- Project (112) - : +- Filter (111) - : +- Scan parquet (110) - +- Exchange (128) - +- Project (127) - +- Filter (126) - +- Scan parquet (125) + Sort (142) + +- Exchange (141) + +- Project (140) + +- SortMergeJoin Inner (139) + :- Sort (133) + : +- Exchange (132) + : +- Project (131) + : +- SortMergeJoin LeftSemi (130) + : :- Sort (97) + : : +- Exchange (96) + : : +- Filter (95) + : : +- Scan parquet (94) + : +- Sort (129) + : +- Exchange (128) + : +- Project (127) + : +- SortMergeJoin Inner (126) + : :- Sort (109) + : : +- Exchange (108) + : : +- SortMergeJoin LeftSemi (107) + : : :- Sort (101) + : : : +- Exchange (100) + : : : +- Filter (99) + : : : +- Scan parquet (98) + : : +- Sort (106) + : : +- Exchange (105) + : : +- Project (104) + : : +- Filter (103) + : : +- Scan parquet (102) + : +- Sort (125) + : +- Exchange (124) + : +- Filter (123) + : +- HashAggregate (122) + : +- HashAggregate (121) + : +- SortMergeJoin LeftSemi (120) + : :- Sort (114) + : : +- Exchange (113) + : : +- Project (112) + : : +- Filter (111) + : : +- Scan parquet (110) + : +- Sort (119) + : +- Exchange (118) + : +- Project (117) + : +- Filter (116) + : +- Scan parquet (115) + +- Sort (138) + +- Exchange (137) + +- Project (136) + +- Filter (135) + +- Scan parquet (134) (1) Scan parquet @@ -507,176 +517,216 @@ Condition : isnotnull(s_nationkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(97) Scan parquet +(97) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(98) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_availqty), IsNotNull(ps_partkey), IsNotNull(ps_suppkey)] ReadSchema: struct -(98) Filter +(99) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Condition : ((isnotnull(ps_availqty#X) AND isnotnull(ps_partkey#X)) AND isnotnull(ps_suppkey#X)) -(99) Exchange +(100) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(100) Scan parquet +(101) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: [ps_partkey#X ASC NULLS FIRST], false, 0 + +(102) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(101) Filter +(103) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(102) Project +(104) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(103) Exchange +(105) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(104) ShuffledHashJoin +(106) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(107) SortMergeJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(105) Exchange +(108) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(106) Scan parquet +(109) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: [ps_partkey#X ASC NULLS FIRST, ps_suppkey#X ASC NULLS FIRST], false, 0 + +(110) Scan parquet Output [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)] ReadSchema: struct -(107) Filter +(111) Filter Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND isnotnull(l_partkey#X)) AND isnotnull(l_suppkey#X)) -(108) Project +(112) Project Output [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] -(109) Exchange +(113) Exchange Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(110) Scan parquet +(114) Sort +Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(115) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(111) Filter +(116) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(112) Project +(117) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(113) Exchange +(118) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(119) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(120) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(115) HashAggregate +(121) HashAggregate Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -(116) HashAggregate +(122) HashAggregate Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [CheckOverflow((0.50 * promote_precision(sum(l_quantity#X)#X)), DecimalType(24,3)) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(117) Filter +(123) Filter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Condition : isnotnull((0.5 * sum(l_quantity))#X) -(118) Exchange +(124) Exchange Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(119) ShuffledHashJoin +(125) Sort +Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] +Arguments: [l_partkey#X ASC NULLS FIRST, l_suppkey#X ASC NULLS FIRST], false, 0 + +(126) SortMergeJoin Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join condition: (cast(ps_availqty#X as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(120) Project +(127) Project Output [1]: [ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(121) Exchange +(128) Exchange Input [1]: [ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(122) ShuffledHashJoin +(129) Sort +Input [1]: [ps_suppkey#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(130) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join condition: None -(123) Project +(131) Project Output [3]: [s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(124) Exchange +(132) Exchange Input [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(125) Scan parquet +(133) Sort +Input [3]: [s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(134) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(126) Filter +(135) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(127) Project +(136) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(128) Exchange +(137) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) ShuffledHashJoin +(138) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(139) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(130) Project +(140) Project Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(131) Exchange +(141) Exchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) Sort +(142) Sort Input [2]: [s_name#X, s_address#X] Arguments: [s_name#X ASC NULLS FIRST], true, 0 -(133) AdaptiveSparkPlan +(143) AdaptiveSparkPlan Output [2]: [s_name#X, s_address#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/21.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/21.txt index bd77a7f7f043..f92684b17b15 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/21.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/21.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (128) +AdaptiveSparkPlan (137) +- == Final Plan == VeloxColumnarToRowExec (91) +- ^ RegularHashAggregateExecTransformer (89) @@ -72,42 +72,51 @@ AdaptiveSparkPlan (128) +- ^ NoopFilter (71) +- ^ Scan parquet (70) +- == Initial Plan == - TakeOrderedAndProject (127) - +- HashAggregate (126) - +- Exchange (125) - +- HashAggregate (124) - +- Project (123) - +- ShuffledHashJoin Inner BuildRight (122) - :- Exchange (117) - : +- Project (116) - : +- ShuffledHashJoin Inner BuildRight (115) - : :- Exchange (110) - : : +- Project (109) - : : +- ShuffledHashJoin Inner BuildLeft (108) - : : :- Exchange (94) - : : : +- Filter (93) - : : : +- Scan parquet (92) - : : +- Exchange (107) - : : +- ShuffledHashJoin LeftAnti BuildRight (106) - : : :- ShuffledHashJoin LeftSemi BuildRight (101) - : : : :- Exchange (98) - : : : : +- Project (97) - : : : : +- Filter (96) - : : : : +- Scan parquet (95) - : : : +- Exchange (100) - : : : +- Scan parquet (99) - : : +- Exchange (105) - : : +- Project (104) - : : +- Filter (103) - : : +- Scan parquet (102) - : +- Exchange (114) - : +- Project (113) - : +- Filter (112) - : +- Scan parquet (111) - +- Exchange (121) - +- Project (120) - +- Filter (119) - +- Scan parquet (118) + TakeOrderedAndProject (136) + +- HashAggregate (135) + +- Exchange (134) + +- HashAggregate (133) + +- Project (132) + +- SortMergeJoin Inner (131) + :- Sort (125) + : +- Exchange (124) + : +- Project (123) + : +- SortMergeJoin Inner (122) + : :- Sort (116) + : : +- Exchange (115) + : : +- Project (114) + : : +- SortMergeJoin Inner (113) + : : :- Sort (95) + : : : +- Exchange (94) + : : : +- Filter (93) + : : : +- Scan parquet (92) + : : +- Sort (112) + : : +- Exchange (111) + : : +- SortMergeJoin LeftAnti (110) + : : :- SortMergeJoin LeftSemi (104) + : : : :- Sort (100) + : : : : +- Exchange (99) + : : : : +- Project (98) + : : : : +- Filter (97) + : : : : +- Scan parquet (96) + : : : +- Sort (103) + : : : +- Exchange (102) + : : : +- Scan parquet (101) + : : +- Sort (109) + : : +- Exchange (108) + : : +- Project (107) + : : +- Filter (106) + : : +- Scan parquet (105) + : +- Sort (121) + : +- Exchange (120) + : +- Project (119) + : +- Filter (118) + : +- Scan parquet (117) + +- Sort (130) + +- Exchange (129) + +- Project (128) + +- Filter (127) + +- Scan parquet (126) (1) Scan parquet @@ -496,163 +505,199 @@ Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(95) Scan parquet +(95) Sort +Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(96) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(96) Filter +(97) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((((isnotnull(l_receiptdate#X) AND isnotnull(l_commitdate#X)) AND (l_receiptdate#X > l_commitdate#X)) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(97) Project +(98) Project Output [2]: [l_orderkey#X, l_suppkey#X] Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] -(98) Exchange +(99) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(99) Scan parquet +(100) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(101) Scan parquet Output [2]: [l_orderkey#X, l_suppkey#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(100) Exchange +(102) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(101) ShuffledHashJoin +(103) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(104) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: NOT (l_suppkey#X = l_suppkey#X) -(102) Scan parquet +(105) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate)] ReadSchema: struct -(103) Filter +(106) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((isnotnull(l_receiptdate#X) AND isnotnull(l_commitdate#X)) AND (l_receiptdate#X > l_commitdate#X)) -(104) Project +(107) Project Output [2]: [l_orderkey#X, l_suppkey#X] Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] -(105) Exchange +(108) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(106) ShuffledHashJoin +(109) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(110) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: NOT (l_suppkey#X = l_suppkey#X) -(107) Exchange +(111) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(108) ShuffledHashJoin +(112) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(113) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [l_suppkey#X] Join condition: None -(109) Project +(114) Project Output [3]: [s_name#X, s_nationkey#X, l_orderkey#X] Input [5]: [s_suppkey#X, s_name#X, s_nationkey#X, l_orderkey#X, l_suppkey#X] -(110) Exchange +(115) Exchange Input [3]: [s_name#X, s_nationkey#X, l_orderkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(111) Scan parquet +(116) Sort +Input [3]: [s_name#X, s_nationkey#X, l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(117) Scan parquet Output [2]: [o_orderkey#X, o_orderstatus#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderstatus), EqualTo(o_orderstatus,F), IsNotNull(o_orderkey)] ReadSchema: struct -(112) Filter +(118) Filter Input [2]: [o_orderkey#X, o_orderstatus#X] Condition : ((isnotnull(o_orderstatus#X) AND (o_orderstatus#X = F)) AND isnotnull(o_orderkey#X)) -(113) Project +(119) Project Output [1]: [o_orderkey#X] Input [2]: [o_orderkey#X, o_orderstatus#X] -(114) Exchange +(120) Exchange Input [1]: [o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(115) ShuffledHashJoin +(121) Sort +Input [1]: [o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(122) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(116) Project +(123) Project Output [2]: [s_name#X, s_nationkey#X] Input [4]: [s_name#X, s_nationkey#X, l_orderkey#X, o_orderkey#X] -(117) Exchange +(124) Exchange Input [2]: [s_name#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(118) Scan parquet +(125) Sort +Input [2]: [s_name#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(126) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,SAUDI ARABIA), IsNotNull(n_nationkey)] ReadSchema: struct -(119) Filter +(127) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = SAUDI ARABIA)) AND isnotnull(n_nationkey#X)) -(120) Project +(128) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(121) Exchange +(129) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(122) ShuffledHashJoin +(130) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(131) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(123) Project +(132) Project Output [1]: [s_name#X] Input [3]: [s_name#X, s_nationkey#X, n_nationkey#X] -(124) HashAggregate +(133) HashAggregate Input [1]: [s_name#X] Keys [1]: [s_name#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [s_name#X, count#X] -(125) Exchange +(134) Exchange Input [2]: [s_name#X, count#X] Arguments: hashpartitioning(s_name#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) HashAggregate +(135) HashAggregate Input [2]: [s_name#X, count#X] Keys [1]: [s_name#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [s_name#X, count(1)#X AS numwait#X] -(127) TakeOrderedAndProject +(136) TakeOrderedAndProject Input [2]: [s_name#X, numwait#X] Arguments: X, [numwait#X DESC NULLS LAST, s_name#X ASC NULLS FIRST], [s_name#X, numwait#X] -(128) AdaptiveSparkPlan +(137) AdaptiveSparkPlan Output [2]: [s_name#X, numwait#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/22.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/22.txt index 7cf55b4c0f2d..1c2790a4a999 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/22.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/22.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (50) +AdaptiveSparkPlan (52) +- == Final Plan == VeloxColumnarToRowExec (37) +- ^ SortExecTransformer (35) @@ -30,18 +30,20 @@ AdaptiveSparkPlan (50) +- ^ ProjectExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (49) - +- Exchange (48) - +- HashAggregate (47) - +- Exchange (46) - +- HashAggregate (45) - +- Project (44) - +- ShuffledHashJoin LeftAnti BuildRight (43) - :- Exchange (40) - : +- Filter (39) - : +- Scan parquet (38) - +- Exchange (42) - +- Scan parquet (41) + Sort (51) + +- Exchange (50) + +- HashAggregate (49) + +- Exchange (48) + +- HashAggregate (47) + +- Project (46) + +- SortMergeJoin LeftAnti (45) + :- Sort (41) + : +- Exchange (40) + : +- Filter (39) + : +- Scan parquet (38) + +- Sort (44) + +- Exchange (43) + +- Scan parquet (42) (1) Scan parquet @@ -210,170 +212,178 @@ Condition : ((isnotnull(c_acctbal#X) AND substring(c_phone#X, 1, 2) IN (13,31,23 Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(41) Scan parquet +(41) Sort +Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(42) Scan parquet Output [1]: [o_custkey#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(42) Exchange +(43) Exchange Input [1]: [o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) ShuffledHashJoin +(44) Sort +Input [1]: [o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(45) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(44) Project +(46) Project Output [2]: [substring(c_phone#X, 1, 2) AS cntrycode#X, c_acctbal#X] Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] -(45) HashAggregate +(47) HashAggregate Input [2]: [cntrycode#X, c_acctbal#X] Keys [1]: [cntrycode#X] Functions [2]: [partial_count(1), partial_sum(c_acctbal#X)] Aggregate Attributes [3]: [count#X, sum#X, isEmpty#X] Results [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] -(46) Exchange +(48) Exchange Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] Arguments: hashpartitioning(cntrycode#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) HashAggregate +(49) HashAggregate Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] Keys [1]: [cntrycode#X] Functions [2]: [count(1), sum(c_acctbal#X)] Aggregate Attributes [2]: [count(1)#X, sum(c_acctbal#X)#X] Results [3]: [cntrycode#X, count(1)#X AS numcust#X, sum(c_acctbal#X)#X AS totacctbal#X] -(48) Exchange +(50) Exchange Input [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: rangepartitioning(cntrycode#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(49) Sort +(51) Sort Input [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: [cntrycode#X ASC NULLS FIRST], true, 0 -(50) AdaptiveSparkPlan +(52) AdaptiveSparkPlan Output [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 1 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (70) +AdaptiveSparkPlan (72) +- == Final Plan == - VeloxColumnarToRowExec (63) - +- ^ RegularHashAggregateExecTransformer (61) - +- ^ InputIteratorTransformer (60) - +- ShuffleQueryStage (58), Statistics(X) - +- ColumnarExchange (57) - +- VeloxAppendBatches (56) - +- ^ FlushableHashAggregateExecTransformer (54) - +- ^ ProjectExecTransformer (53) - +- ^ NoopFilter (52) - +- ^ Scan parquet (51) + VeloxColumnarToRowExec (65) + +- ^ RegularHashAggregateExecTransformer (63) + +- ^ InputIteratorTransformer (62) + +- ShuffleQueryStage (60), Statistics(X) + +- ColumnarExchange (59) + +- VeloxAppendBatches (58) + +- ^ FlushableHashAggregateExecTransformer (56) + +- ^ ProjectExecTransformer (55) + +- ^ NoopFilter (54) + +- ^ Scan parquet (53) +- == Initial Plan == - HashAggregate (69) - +- Exchange (68) - +- HashAggregate (67) - +- Project (66) - +- Filter (65) - +- Scan parquet (64) + HashAggregate (71) + +- Exchange (70) + +- HashAggregate (69) + +- Project (68) + +- Filter (67) + +- Scan parquet (66) -(51) Scan parquet +(53) Scan parquet Output [2]: [c_phone#X, c_acctbal#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_acctbal), GreaterThan(c_acctbal,0.00)] ReadSchema: struct -(52) NoopFilter +(54) NoopFilter Input [2]: [c_phone#X, c_acctbal#X] Arguments: [c_phone#X, c_acctbal#X] -(53) ProjectExecTransformer +(55) ProjectExecTransformer Output [1]: [c_acctbal#X] Input [2]: [c_phone#X, c_acctbal#X] -(54) FlushableHashAggregateExecTransformer +(56) FlushableHashAggregateExecTransformer Input [1]: [c_acctbal#X] Keys: [] Functions [1]: [partial_avg(c_acctbal#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [2]: [sum#X, count#X] -(55) WholeStageCodegenTransformer (X) +(57) WholeStageCodegenTransformer (X) Input [2]: [sum#X, count#X] Arguments: false -(56) VeloxAppendBatches +(58) VeloxAppendBatches Input [2]: [sum#X, count#X] Arguments: X -(57) ColumnarExchange +(59) ColumnarExchange Input [2]: [sum#X, count#X] Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] -(58) ShuffleQueryStage +(60) ShuffleQueryStage Output [2]: [sum#X, count#X] Arguments: X -(59) InputAdapter +(61) InputAdapter Input [2]: [sum#X, count#X] -(60) InputIteratorTransformer +(62) InputIteratorTransformer Input [2]: [sum#X, count#X] -(61) RegularHashAggregateExecTransformer +(63) RegularHashAggregateExecTransformer Input [2]: [sum#X, count#X] Keys: [] Functions [1]: [avg(c_acctbal#X)] Aggregate Attributes [1]: [avg(c_acctbal#X)#X] Results [1]: [avg(c_acctbal#X)#X AS avg(c_acctbal)#X] -(62) WholeStageCodegenTransformer (X) +(64) WholeStageCodegenTransformer (X) Input [1]: [avg(c_acctbal)#X] Arguments: false -(63) VeloxColumnarToRowExec +(65) VeloxColumnarToRowExec Input [1]: [avg(c_acctbal)#X] -(64) Scan parquet +(66) Scan parquet Output [2]: [c_phone#X, c_acctbal#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_acctbal), GreaterThan(c_acctbal,0.00)] ReadSchema: struct -(65) Filter +(67) Filter Input [2]: [c_phone#X, c_acctbal#X] Condition : ((isnotnull(c_acctbal#X) AND (c_acctbal#X > 0.00)) AND substring(c_phone#X, 1, 2) IN (13,31,23,29,30,18,17)) -(66) Project +(68) Project Output [1]: [c_acctbal#X] Input [2]: [c_phone#X, c_acctbal#X] -(67) HashAggregate +(69) HashAggregate Input [1]: [c_acctbal#X] Keys: [] Functions [1]: [partial_avg(c_acctbal#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [2]: [sum#X, count#X] -(68) Exchange +(70) Exchange Input [2]: [sum#X, count#X] Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=X] -(69) HashAggregate +(71) HashAggregate Input [2]: [sum#X, count#X] Keys: [] Functions [1]: [avg(c_acctbal#X)] Aggregate Attributes [1]: [avg(c_acctbal#X)#X] Results [1]: [avg(c_acctbal#X)#X AS avg(c_acctbal)#X] -(70) AdaptiveSparkPlan +(72) AdaptiveSparkPlan Output [1]: [avg(c_acctbal)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/3.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/3.txt index 8b1f048c7d6e..1f9905294144 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/3.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/3.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (63) +AdaptiveSparkPlan (67) +- == Final Plan == VeloxColumnarToRowExec (43) +- TakeOrderedAndProjectExecTransformer (42) @@ -36,25 +36,29 @@ AdaptiveSparkPlan (63) +- ^ NoopFilter (28) +- ^ Scan parquet (27) +- == Initial Plan == - TakeOrderedAndProject (62) - +- HashAggregate (61) - +- HashAggregate (60) - +- Project (59) - +- ShuffledHashJoin Inner BuildRight (58) - :- Exchange (53) - : +- Project (52) - : +- ShuffledHashJoin Inner BuildLeft (51) - : :- Exchange (47) - : : +- Project (46) - : : +- Filter (45) - : : +- Scan parquet (44) - : +- Exchange (50) - : +- Filter (49) - : +- Scan parquet (48) - +- Exchange (57) - +- Project (56) - +- Filter (55) - +- Scan parquet (54) + TakeOrderedAndProject (66) + +- HashAggregate (65) + +- HashAggregate (64) + +- Project (63) + +- SortMergeJoin Inner (62) + :- Sort (56) + : +- Exchange (55) + : +- Project (54) + : +- SortMergeJoin Inner (53) + : :- Sort (48) + : : +- Exchange (47) + : : +- Project (46) + : : +- Filter (45) + : : +- Scan parquet (44) + : +- Sort (52) + : +- Exchange (51) + : +- Filter (50) + : +- Scan parquet (49) + +- Sort (61) + +- Exchange (60) + +- Project (59) + +- Filter (58) + +- Scan parquet (57) (1) Scan parquet @@ -256,80 +260,96 @@ Input [2]: [c_custkey#X, c_mktsegment#X] Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) Scan parquet +(48) Sort +Input [1]: [c_custkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(49) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), LessThan(o_orderdate,1995-03-15), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(49) Filter +(50) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Condition : (((isnotnull(o_orderdate#X) AND (o_orderdate#X < 1995-03-15)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(50) Exchange +(51) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(51) ShuffledHashJoin +(52) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(53) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(52) Project +(54) Project Output [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Input [5]: [c_custkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] -(53) Exchange +(55) Exchange Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(54) Scan parquet +(56) Sort +Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(57) Scan parquet Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThan(l_shipdate,1995-03-15), IsNotNull(l_orderkey)] ReadSchema: struct -(55) Filter +(58) Filter Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X > 1995-03-15)) AND isnotnull(l_orderkey#X)) -(56) Project +(59) Project Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(57) Exchange +(60) Exchange Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(58) ShuffledHashJoin +(61) Sort +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(62) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(59) Project +(63) Project Output [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] -(60) HashAggregate +(64) HashAggregate Input [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] -(61) HashAggregate +(65) HashAggregate Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [4]: [l_orderkey#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS revenue#X, o_orderdate#X, o_shippriority#X] -(62) TakeOrderedAndProject +(66) TakeOrderedAndProject Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: X, [revenue#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] -(63) AdaptiveSparkPlan +(67) AdaptiveSparkPlan Output [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/4.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/4.txt index 1b680584826d..130bc2983040 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/4.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/4.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (54) +AdaptiveSparkPlan (56) +- == Final Plan == VeloxColumnarToRowExec (38) +- ^ SortExecTransformer (36) @@ -31,21 +31,23 @@ AdaptiveSparkPlan (54) +- ^ NoopFilter (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (53) - +- Exchange (52) - +- HashAggregate (51) - +- Exchange (50) - +- HashAggregate (49) - +- Project (48) - +- ShuffledHashJoin LeftSemi BuildRight (47) - :- Exchange (42) - : +- Project (41) - : +- Filter (40) - : +- Scan parquet (39) - +- Exchange (46) - +- Project (45) - +- Filter (44) - +- Scan parquet (43) + Sort (55) + +- Exchange (54) + +- HashAggregate (53) + +- Exchange (52) + +- HashAggregate (51) + +- Project (50) + +- SortMergeJoin LeftSemi (49) + :- Sort (43) + : +- Exchange (42) + : +- Project (41) + : +- Filter (40) + : +- Scan parquet (39) + +- Sort (48) + +- Exchange (47) + +- Project (46) + +- Filter (45) + +- Scan parquet (44) (1) Scan parquet @@ -223,60 +225,68 @@ Input [3]: [o_orderkey#X, o_orderdate#X, o_orderpriority#X] Input [2]: [o_orderkey#X, o_orderpriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) Scan parquet +(43) Sort +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(44) Scan parquet Output [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate)] ReadSchema: struct -(44) Filter +(45) Filter Input [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((isnotnull(l_commitdate#X) AND isnotnull(l_receiptdate#X)) AND (l_commitdate#X < l_receiptdate#X)) -(45) Project +(46) Project Output [1]: [l_orderkey#X] Input [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] -(46) Exchange +(47) Exchange Input [1]: [l_orderkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) ShuffledHashJoin +(48) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(49) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(48) Project +(50) Project Output [1]: [o_orderpriority#X] Input [2]: [o_orderkey#X, o_orderpriority#X] -(49) HashAggregate +(51) HashAggregate Input [1]: [o_orderpriority#X] Keys [1]: [o_orderpriority#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [o_orderpriority#X, count#X] -(50) Exchange +(52) Exchange Input [2]: [o_orderpriority#X, count#X] Arguments: hashpartitioning(o_orderpriority#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(51) HashAggregate +(53) HashAggregate Input [2]: [o_orderpriority#X, count#X] Keys [1]: [o_orderpriority#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [o_orderpriority#X, count(1)#X AS order_count#X] -(52) Exchange +(54) Exchange Input [2]: [o_orderpriority#X, order_count#X] Arguments: rangepartitioning(o_orderpriority#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(53) Sort +(55) Sort Input [2]: [o_orderpriority#X, order_count#X] Arguments: [o_orderpriority#X ASC NULLS FIRST], true, 0 -(54) AdaptiveSparkPlan +(56) AdaptiveSparkPlan Output [2]: [o_orderpriority#X, order_count#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/5.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/5.txt index 67159dbb648a..10ce074fd760 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/5.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/5.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (146) +AdaptiveSparkPlan (156) +- == Final Plan == VeloxColumnarToRowExec (106) +- ^ SortExecTransformer (104) @@ -83,45 +83,55 @@ AdaptiveSparkPlan (146) +- ^ NoopFilter (79) +- ^ Scan parquet (78) +- == Initial Plan == - Sort (145) - +- Exchange (144) - +- HashAggregate (143) - +- Exchange (142) - +- HashAggregate (141) - +- Project (140) - +- ShuffledHashJoin Inner BuildRight (139) - :- Exchange (134) - : +- Project (133) - : +- ShuffledHashJoin Inner BuildRight (132) - : :- Exchange (128) - : : +- Project (127) - : : +- ShuffledHashJoin Inner BuildRight (126) - : : :- Exchange (122) - : : : +- Project (121) - : : : +- ShuffledHashJoin Inner BuildRight (120) - : : : :- Exchange (116) - : : : : +- Project (115) - : : : : +- ShuffledHashJoin Inner BuildLeft (114) - : : : : :- Exchange (109) - : : : : : +- Filter (108) - : : : : : +- Scan parquet (107) - : : : : +- Exchange (113) - : : : : +- Project (112) - : : : : +- Filter (111) - : : : : +- Scan parquet (110) - : : : +- Exchange (119) - : : : +- Filter (118) - : : : +- Scan parquet (117) - : : +- Exchange (125) - : : +- Filter (124) - : : +- Scan parquet (123) - : +- Exchange (131) - : +- Filter (130) - : +- Scan parquet (129) - +- Exchange (138) - +- Project (137) - +- Filter (136) - +- Scan parquet (135) + Sort (155) + +- Exchange (154) + +- HashAggregate (153) + +- Exchange (152) + +- HashAggregate (151) + +- Project (150) + +- SortMergeJoin Inner (149) + :- Sort (143) + : +- Exchange (142) + : +- Project (141) + : +- SortMergeJoin Inner (140) + : :- Sort (135) + : : +- Exchange (134) + : : +- Project (133) + : : +- SortMergeJoin Inner (132) + : : :- Sort (127) + : : : +- Exchange (126) + : : : +- Project (125) + : : : +- SortMergeJoin Inner (124) + : : : :- Sort (119) + : : : : +- Exchange (118) + : : : : +- Project (117) + : : : : +- SortMergeJoin Inner (116) + : : : : :- Sort (110) + : : : : : +- Exchange (109) + : : : : : +- Filter (108) + : : : : : +- Scan parquet (107) + : : : : +- Sort (115) + : : : : +- Exchange (114) + : : : : +- Project (113) + : : : : +- Filter (112) + : : : : +- Scan parquet (111) + : : : +- Sort (123) + : : : +- Exchange (122) + : : : +- Filter (121) + : : : +- Scan parquet (120) + : : +- Sort (131) + : : +- Exchange (130) + : : +- Filter (129) + : : +- Scan parquet (128) + : +- Sort (139) + : +- Exchange (138) + : +- Filter (137) + : +- Scan parquet (136) + +- Sort (148) + +- Exchange (147) + +- Project (146) + +- Filter (145) + +- Scan parquet (144) (1) Scan parquet @@ -567,176 +577,216 @@ Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(110) Scan parquet +(110) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(111) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1994-01-01), LessThan(o_orderdate,1995-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(111) Filter +(112) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1994-01-01)) AND (o_orderdate#X < 1995-01-01)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(112) Project +(113) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] -(113) Exchange +(114) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(115) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(116) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(115) Project +(117) Project Output [2]: [c_nationkey#X, o_orderkey#X] Input [4]: [c_custkey#X, c_nationkey#X, o_orderkey#X, o_custkey#X] -(116) Exchange +(118) Exchange Input [2]: [c_nationkey#X, o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(119) Sort +Input [2]: [c_nationkey#X, o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(120) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_orderkey), IsNotNull(l_suppkey)] ReadSchema: struct -(118) Filter +(121) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Condition : (isnotnull(l_orderkey#X) AND isnotnull(l_suppkey#X)) -(119) Exchange +(122) Exchange Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(123) Sort +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(124) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(121) Project +(125) Project Output [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Input [6]: [c_nationkey#X, o_orderkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] -(122) Exchange +(126) Exchange Input [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(127) Sort +Input [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST, c_nationkey#X ASC NULLS FIRST], false, 0 + +(128) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(124) Filter +(129) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(125) Exchange +(130) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) ShuffledHashJoin +(131) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST, s_nationkey#X ASC NULLS FIRST], false, 0 + +(132) SortMergeJoin Left keys [2]: [l_suppkey#X, c_nationkey#X] Right keys [2]: [s_suppkey#X, s_nationkey#X] Join condition: None -(127) Project +(133) Project Output [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] Input [6]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(128) Exchange +(134) Exchange Input [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Scan parquet +(135) Sort +Input [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(136) Scan parquet Output [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] ReadSchema: struct -(130) Filter +(137) Filter Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Condition : (isnotnull(n_nationkey#X) AND isnotnull(n_regionkey#X)) -(131) Exchange +(138) Exchange Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(139) Sort +Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(140) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(133) Project +(141) Project Output [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] Input [6]: [l_extendedprice#X, l_discount#X, s_nationkey#X, n_nationkey#X, n_name#X, n_regionkey#X] -(134) Exchange +(142) Exchange Input [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] Arguments: hashpartitioning(n_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(135) Scan parquet +(143) Sort +Input [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] +Arguments: [n_regionkey#X ASC NULLS FIRST], false, 0 + +(144) Scan parquet Output [2]: [r_regionkey#X, r_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(r_name), EqualTo(r_name,ASIA), IsNotNull(r_regionkey)] ReadSchema: struct -(136) Filter +(145) Filter Input [2]: [r_regionkey#X, r_name#X] Condition : ((isnotnull(r_name#X) AND (r_name#X = ASIA)) AND isnotnull(r_regionkey#X)) -(137) Project +(146) Project Output [1]: [r_regionkey#X] Input [2]: [r_regionkey#X, r_name#X] -(138) Exchange +(147) Exchange Input [1]: [r_regionkey#X] Arguments: hashpartitioning(r_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(139) ShuffledHashJoin +(148) Sort +Input [1]: [r_regionkey#X] +Arguments: [r_regionkey#X ASC NULLS FIRST], false, 0 + +(149) SortMergeJoin Left keys [1]: [n_regionkey#X] Right keys [1]: [r_regionkey#X] Join condition: None -(140) Project +(150) Project Output [3]: [l_extendedprice#X, l_discount#X, n_name#X] Input [5]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X, r_regionkey#X] -(141) HashAggregate +(151) HashAggregate Input [3]: [l_extendedprice#X, l_discount#X, n_name#X] Keys [1]: [n_name#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [n_name#X, sum#X, isEmpty#X] -(142) Exchange +(152) Exchange Input [3]: [n_name#X, sum#X, isEmpty#X] Arguments: hashpartitioning(n_name#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(143) HashAggregate +(153) HashAggregate Input [3]: [n_name#X, sum#X, isEmpty#X] Keys [1]: [n_name#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [2]: [n_name#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS revenue#X] -(144) Exchange +(154) Exchange Input [2]: [n_name#X, revenue#X] Arguments: rangepartitioning(revenue#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(145) Sort +(155) Sort Input [2]: [n_name#X, revenue#X] Arguments: [revenue#X DESC NULLS LAST], true, 0 -(146) AdaptiveSparkPlan +(156) AdaptiveSparkPlan Output [2]: [n_name#X, revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/7.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/7.txt index 71742ea423b5..651cfa840be8 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/7.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/7.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (139) +AdaptiveSparkPlan (149) +- == Final Plan == VeloxColumnarToRowExec (101) +- ^ SortExecTransformer (99) @@ -79,43 +79,53 @@ AdaptiveSparkPlan (139) +- ShuffleQueryStage (79), Statistics(X) +- ReusedExchange (78) +- == Initial Plan == - Sort (138) - +- Exchange (137) - +- HashAggregate (136) - +- Exchange (135) - +- HashAggregate (134) - +- Project (133) - +- ShuffledHashJoin Inner BuildRight (132) - :- Exchange (128) - : +- Project (127) - : +- ShuffledHashJoin Inner BuildRight (126) - : :- Exchange (122) - : : +- Project (121) - : : +- ShuffledHashJoin Inner BuildRight (120) - : : :- Exchange (116) - : : : +- Project (115) - : : : +- ShuffledHashJoin Inner BuildRight (114) - : : : :- Exchange (110) - : : : : +- Project (109) - : : : : +- ShuffledHashJoin Inner BuildLeft (108) - : : : : :- Exchange (104) - : : : : : +- Filter (103) - : : : : : +- Scan parquet (102) - : : : : +- Exchange (107) - : : : : +- Filter (106) - : : : : +- Scan parquet (105) - : : : +- Exchange (113) - : : : +- Filter (112) - : : : +- Scan parquet (111) - : : +- Exchange (119) - : : +- Filter (118) - : : +- Scan parquet (117) - : +- Exchange (125) - : +- Filter (124) - : +- Scan parquet (123) - +- Exchange (131) - +- Filter (130) - +- Scan parquet (129) + Sort (148) + +- Exchange (147) + +- HashAggregate (146) + +- Exchange (145) + +- HashAggregate (144) + +- Project (143) + +- SortMergeJoin Inner (142) + :- Sort (137) + : +- Exchange (136) + : +- Project (135) + : +- SortMergeJoin Inner (134) + : :- Sort (129) + : : +- Exchange (128) + : : +- Project (127) + : : +- SortMergeJoin Inner (126) + : : :- Sort (121) + : : : +- Exchange (120) + : : : +- Project (119) + : : : +- SortMergeJoin Inner (118) + : : : :- Sort (113) + : : : : +- Exchange (112) + : : : : +- Project (111) + : : : : +- SortMergeJoin Inner (110) + : : : : :- Sort (105) + : : : : : +- Exchange (104) + : : : : : +- Filter (103) + : : : : : +- Scan parquet (102) + : : : : +- Sort (109) + : : : : +- Exchange (108) + : : : : +- Filter (107) + : : : : +- Scan parquet (106) + : : : +- Sort (117) + : : : +- Exchange (116) + : : : +- Filter (115) + : : : +- Scan parquet (114) + : : +- Sort (125) + : : +- Exchange (124) + : : +- Filter (123) + : : +- Scan parquet (122) + : +- Sort (133) + : +- Exchange (132) + : +- Filter (131) + : +- Scan parquet (130) + +- Sort (141) + +- Exchange (140) + +- Filter (139) + +- Scan parquet (138) (1) Scan parquet @@ -537,168 +547,208 @@ Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(105) Scan parquet +(105) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(106) Scan parquet Output [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1995-01-01), LessThanOrEqual(l_shipdate,1996-12-31), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(106) Filter +(107) Filter Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1995-01-01)) AND (l_shipdate#X <= 1996-12-31)) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(107) Exchange +(108) Exchange Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(108) ShuffledHashJoin +(109) Sort +Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(110) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [l_suppkey#X] Join condition: None -(109) Project +(111) Project Output [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Input [7]: [s_suppkey#X, s_nationkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(110) Exchange +(112) Exchange Input [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(111) Scan parquet +(113) Sort +Input [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(114) Scan parquet Output [2]: [o_orderkey#X, o_custkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderkey), IsNotNull(o_custkey)] ReadSchema: struct -(112) Filter +(115) Filter Input [2]: [o_orderkey#X, o_custkey#X] Condition : (isnotnull(o_orderkey#X) AND isnotnull(o_custkey#X)) -(113) Exchange +(116) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(117) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(118) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(115) Project +(119) Project Output [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] Input [7]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_orderkey#X, o_custkey#X] -(116) Exchange +(120) Exchange Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(121) Sort +Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(122) Scan parquet Output [2]: [c_custkey#X, c_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] ReadSchema: struct -(118) Filter +(123) Filter Input [2]: [c_custkey#X, c_nationkey#X] Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) -(119) Exchange +(124) Exchange Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(125) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(126) SortMergeJoin Left keys [1]: [o_custkey#X] Right keys [1]: [c_custkey#X] Join condition: None -(121) Project +(127) Project Output [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X, c_custkey#X, c_nationkey#X] -(122) Exchange +(128) Exchange Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(129) Sort +Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(130) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), Or(EqualTo(n_name,FRANCE),EqualTo(n_name,GERMANY))] ReadSchema: struct -(124) Filter +(131) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : (isnotnull(n_nationkey#X) AND ((n_name#X = FRANCE) OR (n_name#X = GERMANY))) -(125) Exchange +(132) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) ShuffledHashJoin +(133) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(134) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(127) Project +(135) Project Output [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_nationkey#X, n_name#X] -(128) Exchange +(136) Exchange Input [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Scan parquet +(137) Sort +Input [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(138) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), Or(EqualTo(n_name,GERMANY),EqualTo(n_name,FRANCE))] ReadSchema: struct -(130) Filter +(139) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : (isnotnull(n_nationkey#X) AND ((n_name#X = GERMANY) OR (n_name#X = FRANCE))) -(131) Exchange +(140) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(141) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(142) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: (((n_name#X = FRANCE) AND (n_name#X = GERMANY)) OR ((n_name#X = GERMANY) AND (n_name#X = FRANCE))) -(133) Project +(143) Project Output [4]: [n_name#X AS supp_nation#X, n_name#X AS cust_nation#X, year(l_shipdate#X) AS l_year#X, CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) AS volume#X] Input [7]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X, n_nationkey#X, n_name#X] -(134) HashAggregate +(144) HashAggregate Input [4]: [supp_nation#X, cust_nation#X, l_year#X, volume#X] Keys [3]: [supp_nation#X, cust_nation#X, l_year#X] Functions [1]: [partial_sum(volume#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] -(135) Exchange +(145) Exchange Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] Arguments: hashpartitioning(supp_nation#X, cust_nation#X, l_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(136) HashAggregate +(146) HashAggregate Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] Keys [3]: [supp_nation#X, cust_nation#X, l_year#X] Functions [1]: [sum(volume#X)] Aggregate Attributes [1]: [sum(volume#X)#X] Results [4]: [supp_nation#X, cust_nation#X, l_year#X, sum(volume#X)#X AS revenue#X] -(137) Exchange +(147) Exchange Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: rangepartitioning(supp_nation#X ASC NULLS FIRST, cust_nation#X ASC NULLS FIRST, l_year#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(138) Sort +(148) Sort Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: [supp_nation#X ASC NULLS FIRST, cust_nation#X ASC NULLS FIRST, l_year#X ASC NULLS FIRST], true, 0 -(139) AdaptiveSparkPlan +(149) AdaptiveSparkPlan Output [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/8.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/8.txt index 319e6c9f1b21..e359f4c944e7 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/8.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/8.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (193) +AdaptiveSparkPlan (207) +- == Final Plan == VeloxColumnarToRowExec (141) +- ^ SortExecTransformer (139) @@ -110,57 +110,71 @@ AdaptiveSparkPlan (193) +- ^ NoopFilter (113) +- ^ Scan parquet (112) +- == Initial Plan == - Sort (192) - +- Exchange (191) - +- HashAggregate (190) - +- Exchange (189) - +- HashAggregate (188) - +- Project (187) - +- ShuffledHashJoin Inner BuildRight (186) - :- Exchange (181) - : +- Project (180) - : +- ShuffledHashJoin Inner BuildRight (179) - : :- Exchange (175) - : : +- Project (174) - : : +- ShuffledHashJoin Inner BuildRight (173) - : : :- Exchange (169) - : : : +- Project (168) - : : : +- ShuffledHashJoin Inner BuildRight (167) - : : : :- Exchange (163) - : : : : +- Project (162) - : : : : +- ShuffledHashJoin Inner BuildRight (161) - : : : : :- Exchange (157) - : : : : : +- Project (156) - : : : : : +- ShuffledHashJoin Inner BuildRight (155) - : : : : : :- Exchange (151) - : : : : : : +- Project (150) - : : : : : : +- ShuffledHashJoin Inner BuildLeft (149) - : : : : : : :- Exchange (145) - : : : : : : : +- Project (144) - : : : : : : : +- Filter (143) - : : : : : : : +- Scan parquet (142) - : : : : : : +- Exchange (148) - : : : : : : +- Filter (147) - : : : : : : +- Scan parquet (146) - : : : : : +- Exchange (154) - : : : : : +- Filter (153) - : : : : : +- Scan parquet (152) - : : : : +- Exchange (160) - : : : : +- Filter (159) - : : : : +- Scan parquet (158) - : : : +- Exchange (166) - : : : +- Filter (165) - : : : +- Scan parquet (164) - : : +- Exchange (172) - : : +- Filter (171) - : : +- Scan parquet (170) - : +- Exchange (178) - : +- Filter (177) - : +- Scan parquet (176) - +- Exchange (185) - +- Project (184) - +- Filter (183) - +- Scan parquet (182) + Sort (206) + +- Exchange (205) + +- HashAggregate (204) + +- Exchange (203) + +- HashAggregate (202) + +- Project (201) + +- SortMergeJoin Inner (200) + :- Sort (194) + : +- Exchange (193) + : +- Project (192) + : +- SortMergeJoin Inner (191) + : :- Sort (186) + : : +- Exchange (185) + : : +- Project (184) + : : +- SortMergeJoin Inner (183) + : : :- Sort (178) + : : : +- Exchange (177) + : : : +- Project (176) + : : : +- SortMergeJoin Inner (175) + : : : :- Sort (170) + : : : : +- Exchange (169) + : : : : +- Project (168) + : : : : +- SortMergeJoin Inner (167) + : : : : :- Sort (162) + : : : : : +- Exchange (161) + : : : : : +- Project (160) + : : : : : +- SortMergeJoin Inner (159) + : : : : : :- Sort (154) + : : : : : : +- Exchange (153) + : : : : : : +- Project (152) + : : : : : : +- SortMergeJoin Inner (151) + : : : : : : :- Sort (146) + : : : : : : : +- Exchange (145) + : : : : : : : +- Project (144) + : : : : : : : +- Filter (143) + : : : : : : : +- Scan parquet (142) + : : : : : : +- Sort (150) + : : : : : : +- Exchange (149) + : : : : : : +- Filter (148) + : : : : : : +- Scan parquet (147) + : : : : : +- Sort (158) + : : : : : +- Exchange (157) + : : : : : +- Filter (156) + : : : : : +- Scan parquet (155) + : : : : +- Sort (166) + : : : : +- Exchange (165) + : : : : +- Filter (164) + : : : : +- Scan parquet (163) + : : : +- Sort (174) + : : : +- Exchange (173) + : : : +- Filter (172) + : : : +- Scan parquet (171) + : : +- Sort (182) + : : +- Exchange (181) + : : +- Filter (180) + : : +- Scan parquet (179) + : +- Sort (190) + : +- Exchange (189) + : +- Filter (188) + : +- Scan parquet (187) + +- Sort (199) + +- Exchange (198) + +- Project (197) + +- Filter (196) + +- Scan parquet (195) (1) Scan parquet @@ -750,228 +764,284 @@ Input [2]: [p_partkey#X, p_type#X] Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(146) Scan parquet +(146) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(147) Scan parquet Output [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(147) Filter +(148) Filter Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Condition : ((isnotnull(l_partkey#X) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(148) Exchange +(149) Exchange Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(149) ShuffledHashJoin +(150) Sort +Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(151) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join condition: None -(150) Project +(152) Project Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Input [6]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] -(151) Exchange +(153) Exchange Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(152) Scan parquet +(154) Sort +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(155) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(153) Filter +(156) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(154) Exchange +(157) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(155) ShuffledHashJoin +(158) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(159) SortMergeJoin Left keys [1]: [l_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(156) Project +(160) Project Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Input [6]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(157) Exchange +(161) Exchange Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(158) Scan parquet +(162) Sort +Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(163) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1995-01-01), LessThanOrEqual(o_orderdate,1996-12-31), IsNotNull(o_orderkey), IsNotNull(o_custkey)] ReadSchema: struct -(159) Filter +(164) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1995-01-01)) AND (o_orderdate#X <= 1996-12-31)) AND isnotnull(o_orderkey#X)) AND isnotnull(o_custkey#X)) -(160) Exchange +(165) Exchange Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(161) ShuffledHashJoin +(166) Sort +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(167) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(162) Project +(168) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] Input [7]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X] -(163) Exchange +(169) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(164) Scan parquet +(170) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(171) Scan parquet Output [2]: [c_custkey#X, c_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] ReadSchema: struct -(165) Filter +(172) Filter Input [2]: [c_custkey#X, c_nationkey#X] Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) -(166) Exchange +(173) Exchange Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(167) ShuffledHashJoin +(174) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(175) SortMergeJoin Left keys [1]: [o_custkey#X] Right keys [1]: [c_custkey#X] Join condition: None -(168) Project +(176) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X, c_custkey#X, c_nationkey#X] -(169) Exchange +(177) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(170) Scan parquet +(178) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(179) Scan parquet Output [2]: [n_nationkey#X, n_regionkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] ReadSchema: struct -(171) Filter +(180) Filter Input [2]: [n_nationkey#X, n_regionkey#X] Condition : (isnotnull(n_nationkey#X) AND isnotnull(n_regionkey#X)) -(172) Exchange +(181) Exchange Input [2]: [n_nationkey#X, n_regionkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(173) ShuffledHashJoin +(182) Sort +Input [2]: [n_nationkey#X, n_regionkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(183) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(174) Project +(184) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X, n_nationkey#X, n_regionkey#X] -(175) Exchange +(185) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(176) Scan parquet +(186) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(187) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(177) Filter +(188) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(178) Exchange +(189) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(179) ShuffledHashJoin +(190) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(191) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(180) Project +(192) Project Output [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X, n_nationkey#X, n_name#X] -(181) Exchange +(193) Exchange Input [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] Arguments: hashpartitioning(n_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(182) Scan parquet +(194) Sort +Input [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] +Arguments: [n_regionkey#X ASC NULLS FIRST], false, 0 + +(195) Scan parquet Output [2]: [r_regionkey#X, r_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(r_name), EqualTo(r_name,AMERICA), IsNotNull(r_regionkey)] ReadSchema: struct -(183) Filter +(196) Filter Input [2]: [r_regionkey#X, r_name#X] Condition : ((isnotnull(r_name#X) AND (r_name#X = AMERICA)) AND isnotnull(r_regionkey#X)) -(184) Project +(197) Project Output [1]: [r_regionkey#X] Input [2]: [r_regionkey#X, r_name#X] -(185) Exchange +(198) Exchange Input [1]: [r_regionkey#X] Arguments: hashpartitioning(r_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(186) ShuffledHashJoin +(199) Sort +Input [1]: [r_regionkey#X] +Arguments: [r_regionkey#X ASC NULLS FIRST], false, 0 + +(200) SortMergeJoin Left keys [1]: [n_regionkey#X] Right keys [1]: [r_regionkey#X] Join condition: None -(187) Project +(201) Project Output [3]: [year(o_orderdate#X) AS o_year#X, CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) AS volume#X, n_name#X AS nation#X] Input [6]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X, r_regionkey#X] -(188) HashAggregate +(202) HashAggregate Input [3]: [o_year#X, volume#X, nation#X] Keys [1]: [o_year#X] Functions [2]: [partial_sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END), partial_sum(volume#X)] Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Results [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] -(189) Exchange +(203) Exchange Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] Arguments: hashpartitioning(o_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(190) HashAggregate +(204) HashAggregate Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] Keys [1]: [o_year#X] Functions [2]: [sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END), sum(volume#X)] Aggregate Attributes [2]: [sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X, sum(volume#X)#X] Results [2]: [o_year#X, CheckOverflow((promote_precision(sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X) / promote_precision(sum(volume#X)#X)), DecimalType(38,6)) AS mkt_share#X] -(191) Exchange +(205) Exchange Input [2]: [o_year#X, mkt_share#X] Arguments: rangepartitioning(o_year#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(192) Sort +(206) Sort Input [2]: [o_year#X, mkt_share#X] Arguments: [o_year#X ASC NULLS FIRST], true, 0 -(193) AdaptiveSparkPlan +(207) AdaptiveSparkPlan Output [2]: [o_year#X, mkt_share#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/9.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/9.txt index 40dee1752399..21c91ca14180 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/9.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark33/9.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (145) +AdaptiveSparkPlan (155) +- == Final Plan == VeloxColumnarToRowExec (106) +- ^ SortExecTransformer (104) @@ -83,44 +83,54 @@ AdaptiveSparkPlan (145) +- ^ NoopFilter (79) +- ^ Scan parquet (78) +- == Initial Plan == - Sort (144) - +- Exchange (143) - +- HashAggregate (142) - +- Exchange (141) - +- HashAggregate (140) - +- Project (139) - +- ShuffledHashJoin Inner BuildRight (138) - :- Exchange (134) - : +- Project (133) - : +- ShuffledHashJoin Inner BuildRight (132) - : :- Exchange (128) - : : +- Project (127) - : : +- ShuffledHashJoin Inner BuildRight (126) - : : :- Exchange (122) - : : : +- Project (121) - : : : +- ShuffledHashJoin Inner BuildRight (120) - : : : :- Exchange (116) - : : : : +- Project (115) - : : : : +- ShuffledHashJoin Inner BuildLeft (114) - : : : : :- Exchange (110) - : : : : : +- Project (109) - : : : : : +- Filter (108) - : : : : : +- Scan parquet (107) - : : : : +- Exchange (113) - : : : : +- Filter (112) - : : : : +- Scan parquet (111) - : : : +- Exchange (119) - : : : +- Filter (118) - : : : +- Scan parquet (117) - : : +- Exchange (125) - : : +- Filter (124) - : : +- Scan parquet (123) - : +- Exchange (131) - : +- Filter (130) - : +- Scan parquet (129) - +- Exchange (137) - +- Filter (136) - +- Scan parquet (135) + Sort (154) + +- Exchange (153) + +- HashAggregate (152) + +- Exchange (151) + +- HashAggregate (150) + +- Project (149) + +- SortMergeJoin Inner (148) + :- Sort (143) + : +- Exchange (142) + : +- Project (141) + : +- SortMergeJoin Inner (140) + : :- Sort (135) + : : +- Exchange (134) + : : +- Project (133) + : : +- SortMergeJoin Inner (132) + : : :- Sort (127) + : : : +- Exchange (126) + : : : +- Project (125) + : : : +- SortMergeJoin Inner (124) + : : : :- Sort (119) + : : : : +- Exchange (118) + : : : : +- Project (117) + : : : : +- SortMergeJoin Inner (116) + : : : : :- Sort (111) + : : : : : +- Exchange (110) + : : : : : +- Project (109) + : : : : : +- Filter (108) + : : : : : +- Scan parquet (107) + : : : : +- Sort (115) + : : : : +- Exchange (114) + : : : : +- Filter (113) + : : : : +- Scan parquet (112) + : : : +- Sort (123) + : : : +- Exchange (122) + : : : +- Filter (121) + : : : +- Scan parquet (120) + : : +- Sort (131) + : : +- Exchange (130) + : : +- Filter (129) + : : +- Scan parquet (128) + : +- Sort (139) + : +- Exchange (138) + : +- Filter (137) + : +- Scan parquet (136) + +- Sort (147) + +- Exchange (146) + +- Filter (145) + +- Scan parquet (144) (1) Scan parquet @@ -570,168 +580,208 @@ Input [2]: [p_partkey#X, p_name#X] Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(111) Scan parquet +(111) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(112) Scan parquet Output [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(112) Filter +(113) Filter Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Condition : ((isnotnull(l_partkey#X) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(113) Exchange +(114) Exchange Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(115) Sort +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(116) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join condition: None -(115) Project +(117) Project Output [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Input [7]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] -(116) Exchange +(118) Exchange Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(119) Sort +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(120) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(118) Filter +(121) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(119) Exchange +(122) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(123) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(124) SortMergeJoin Left keys [1]: [l_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(121) Project +(125) Project Output [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Input [8]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(122) Exchange +(126) Exchange Input [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(l_suppkey#X, l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(127) Sort +Input [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [l_suppkey#X ASC NULLS FIRST, l_partkey#X ASC NULLS FIRST], false, 0 + +(128) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey), IsNotNull(ps_partkey)] ReadSchema: struct -(124) Filter +(129) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Condition : (isnotnull(ps_suppkey#X) AND isnotnull(ps_partkey#X)) -(125) Exchange +(130) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) ShuffledHashJoin +(131) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST, ps_partkey#X ASC NULLS FIRST], false, 0 + +(132) SortMergeJoin Left keys [2]: [l_suppkey#X, l_partkey#X] Right keys [2]: [ps_suppkey#X, ps_partkey#X] Join condition: None -(127) Project +(133) Project Output [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] Input [10]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_partkey#X, ps_suppkey#X, ps_supplycost#X] -(128) Exchange +(134) Exchange Input [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Scan parquet +(135) Sort +Input [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(136) Scan parquet Output [2]: [o_orderkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderkey)] ReadSchema: struct -(130) Filter +(137) Filter Input [2]: [o_orderkey#X, o_orderdate#X] Condition : isnotnull(o_orderkey#X) -(131) Exchange +(138) Exchange Input [2]: [o_orderkey#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(139) Sort +Input [2]: [o_orderkey#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(140) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(133) Project +(141) Project Output [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] Input [8]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderkey#X, o_orderdate#X] -(134) Exchange +(142) Exchange Input [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(135) Scan parquet +(143) Sort +Input [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(144) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(136) Filter +(145) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(137) Exchange +(146) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(138) ShuffledHashJoin +(147) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(148) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(139) Project +(149) Project Output [3]: [n_name#X AS nation#X, year(o_orderdate#X) AS o_year#X, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) as decimal(27,4))) - promote_precision(cast(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(l_quantity#X)), DecimalType(25,4)) as decimal(27,4)))), DecimalType(27,4)) AS amount#X] Input [8]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X, n_nationkey#X, n_name#X] -(140) HashAggregate +(150) HashAggregate Input [3]: [nation#X, o_year#X, amount#X] Keys [2]: [nation#X, o_year#X] Functions [1]: [partial_sum(amount#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [nation#X, o_year#X, sum#X, isEmpty#X] -(141) Exchange +(151) Exchange Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] Arguments: hashpartitioning(nation#X, o_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(142) HashAggregate +(152) HashAggregate Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] Keys [2]: [nation#X, o_year#X] Functions [1]: [sum(amount#X)] Aggregate Attributes [1]: [sum(amount#X)#X] Results [3]: [nation#X, o_year#X, sum(amount#X)#X AS sum_profit#X] -(143) Exchange +(153) Exchange Input [3]: [nation#X, o_year#X, sum_profit#X] Arguments: rangepartitioning(nation#X ASC NULLS FIRST, o_year#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(144) Sort +(154) Sort Input [3]: [nation#X, o_year#X, sum_profit#X] Arguments: [nation#X ASC NULLS FIRST, o_year#X DESC NULLS LAST], true, 0 -(145) AdaptiveSparkPlan +(155) AdaptiveSparkPlan Output [3]: [nation#X, o_year#X, sum_profit#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/10.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/10.txt index 94e1100ea37d..5e48ceb742d7 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/10.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/10.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (94) +AdaptiveSparkPlan (100) +- == Final Plan == VeloxColumnarToRowExec (67) +- TakeOrderedAndProjectExecTransformer (66) @@ -54,32 +54,38 @@ AdaptiveSparkPlan (94) +- ^ NoopFilter (45) +- ^ Scan parquet (44) +- == Initial Plan == - TakeOrderedAndProject (93) - +- HashAggregate (92) - +- Exchange (91) - +- HashAggregate (90) - +- Project (89) - +- ShuffledHashJoin Inner BuildRight (88) - :- Exchange (84) - : +- Project (83) - : +- ShuffledHashJoin Inner BuildRight (82) - : :- Exchange (77) - : : +- Project (76) - : : +- ShuffledHashJoin Inner BuildRight (75) - : : :- Exchange (70) - : : : +- Filter (69) - : : : +- Scan parquet (68) - : : +- Exchange (74) - : : +- Project (73) - : : +- Filter (72) - : : +- Scan parquet (71) - : +- Exchange (81) - : +- Project (80) - : +- Filter (79) - : +- Scan parquet (78) - +- Exchange (87) - +- Filter (86) - +- Scan parquet (85) + TakeOrderedAndProject (99) + +- HashAggregate (98) + +- Exchange (97) + +- HashAggregate (96) + +- Project (95) + +- SortMergeJoin Inner (94) + :- Sort (89) + : +- Exchange (88) + : +- Project (87) + : +- SortMergeJoin Inner (86) + : :- Sort (80) + : : +- Exchange (79) + : : +- Project (78) + : : +- SortMergeJoin Inner (77) + : : :- Sort (71) + : : : +- Exchange (70) + : : : +- Filter (69) + : : : +- Scan parquet (68) + : : +- Sort (76) + : : +- Exchange (75) + : : +- Project (74) + : : +- Filter (73) + : : +- Scan parquet (72) + : +- Sort (85) + : +- Exchange (84) + : +- Project (83) + : +- Filter (82) + : +- Scan parquet (81) + +- Sort (93) + +- Exchange (92) + +- Filter (91) + +- Scan parquet (90) (1) Scan parquet @@ -374,119 +380,143 @@ Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) Input [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(71) Scan parquet +(71) Sort +Input [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(72) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1993-10-01), LessThan(o_orderdate,1994-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(72) Filter +(73) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1993-10-01)) AND (o_orderdate#X < 1994-01-01)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(73) Project +(74) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] -(74) Exchange +(75) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(75) ShuffledHashJoin +(76) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(77) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join type: Inner Join condition: None -(76) Project +(78) Project Output [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, o_custkey#X] -(77) Exchange +(79) Exchange Input [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(78) Scan parquet +(80) Sort +Input [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(81) Scan parquet Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_returnflag), EqualTo(l_returnflag,R), IsNotNull(l_orderkey)] ReadSchema: struct -(79) Filter +(82) Filter Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] Condition : ((isnotnull(l_returnflag#X) AND (l_returnflag#X = R)) AND isnotnull(l_orderkey#X)) -(80) Project +(83) Project Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] -(81) Exchange +(84) Exchange Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(82) ShuffledHashJoin +(85) Sort +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(86) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: Inner Join condition: None -(83) Project +(87) Project Output [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X] -(84) Exchange +(88) Exchange Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(85) Scan parquet +(89) Sort +Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(90) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(86) Filter +(91) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(87) Exchange +(92) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(88) ShuffledHashJoin +(93) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(94) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(89) Project +(95) Project Output [9]: [c_custkey#X, c_name#X, c_address#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_name#X] Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_nationkey#X, n_name#X] -(90) HashAggregate +(96) HashAggregate Input [9]: [c_custkey#X, c_name#X, c_address#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_name#X] Keys [7]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] -(91) Exchange +(97) Exchange Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] Arguments: hashpartitioning(c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(92) HashAggregate +(98) HashAggregate Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] Keys [7]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [8]: [c_custkey#X, c_name#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] -(93) TakeOrderedAndProject +(99) TakeOrderedAndProject Input [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] Arguments: X, [revenue#X DESC NULLS LAST], [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] -(94) AdaptiveSparkPlan +(100) AdaptiveSparkPlan Output [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/11.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/11.txt index 41d23099319f..6c9eef6b1f1f 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/11.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/11.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (78) +AdaptiveSparkPlan (82) +- == Final Plan == VeloxColumnarToRowExec (56) +- ^ SortExecTransformer (54) @@ -45,27 +45,31 @@ AdaptiveSparkPlan (78) +- ^ NoopFilter (28) +- ^ Scan parquet (27) +- == Initial Plan == - Sort (77) - +- Exchange (76) - +- Filter (75) - +- HashAggregate (74) - +- Exchange (73) - +- HashAggregate (72) - +- Project (71) - +- ShuffledHashJoin Inner BuildRight (70) - :- Exchange (65) - : +- Project (64) - : +- ShuffledHashJoin Inner BuildRight (63) - : :- Exchange (59) - : : +- Filter (58) - : : +- Scan parquet (57) - : +- Exchange (62) - : +- Filter (61) - : +- Scan parquet (60) - +- Exchange (69) - +- Project (68) - +- Filter (67) - +- Scan parquet (66) + Sort (81) + +- Exchange (80) + +- Filter (79) + +- HashAggregate (78) + +- Exchange (77) + +- HashAggregate (76) + +- Project (75) + +- SortMergeJoin Inner (74) + :- Sort (68) + : +- Exchange (67) + : +- Project (66) + : +- SortMergeJoin Inner (65) + : :- Sort (60) + : : +- Exchange (59) + : : +- Filter (58) + : : +- Scan parquet (57) + : +- Sort (64) + : +- Exchange (63) + : +- Filter (62) + : +- Scan parquet (61) + +- Sort (73) + +- Exchange (72) + +- Project (71) + +- Filter (70) + +- Scan parquet (69) (1) Scan parquet @@ -313,365 +317,401 @@ Condition : isnotnull(ps_suppkey#X) Input [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(60) Scan parquet +(60) Sort +Input [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(61) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(61) Filter +(62) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(62) Exchange +(63) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(63) ShuffledHashJoin +(64) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(65) SortMergeJoin Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join type: Inner Join condition: None -(64) Project +(66) Project Output [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(65) Exchange +(67) Exchange Input [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(66) Scan parquet +(68) Sort +Input [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(69) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,GERMANY), IsNotNull(n_nationkey)] ReadSchema: struct -(67) Filter +(70) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = GERMANY)) AND isnotnull(n_nationkey#X)) -(68) Project +(71) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(69) Exchange +(72) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(70) ShuffledHashJoin +(73) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(74) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(71) Project +(75) Project Output [3]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X] Input [5]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(72) HashAggregate +(76) HashAggregate Input [3]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X] Keys [1]: [ps_partkey#X] Functions [1]: [partial_sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [ps_partkey#X, sum#X, isEmpty#X] -(73) Exchange +(77) Exchange Input [3]: [ps_partkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) HashAggregate +(78) HashAggregate Input [3]: [ps_partkey#X, sum#X, isEmpty#X] Keys [1]: [ps_partkey#X] Functions [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] Aggregate Attributes [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] Results [2]: [ps_partkey#X, sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X AS value#X] -(75) Filter +(79) Filter Input [2]: [ps_partkey#X, value#X] Condition : (isnotnull(value#X) AND (cast(value#X as decimal(38,6)) > Subquery subquery#X, [id=#X])) -(76) Exchange +(80) Exchange Input [2]: [ps_partkey#X, value#X] Arguments: rangepartitioning(value#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(77) Sort +(81) Sort Input [2]: [ps_partkey#X, value#X] Arguments: [value#X DESC NULLS LAST], true, 0 -(78) AdaptiveSparkPlan +(82) AdaptiveSparkPlan Output [2]: [ps_partkey#X, value#X] Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 47 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (128) +AdaptiveSparkPlan (136) +- == Final Plan == - VeloxColumnarToRowExec (110) - +- ^ ProjectExecTransformer (108) - +- ^ RegularHashAggregateExecTransformer (107) - +- ^ RegularHashAggregateExecTransformer (106) - +- ^ ProjectExecTransformer (105) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (104) - :- ^ InputIteratorTransformer (99) - : +- ShuffleQueryStage (97), Statistics(X) - : +- ColumnarExchange (96) - : +- VeloxAppendBatches (95) - : +- ^ ProjectExecTransformer (93) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (92) - : :- ^ InputIteratorTransformer (87) - : : +- ShuffleQueryStage (85), Statistics(X) - : : +- ColumnarExchange (84) - : : +- VeloxAppendBatches (83) - : : +- ^ ProjectExecTransformer (81) - : : +- ^ NoopFilter (80) - : : +- ^ Scan parquet (79) - : +- ^ InputIteratorTransformer (91) - : +- ShuffleQueryStage (89), Statistics(X) - : +- ReusedExchange (88) - +- ^ InputIteratorTransformer (103) - +- ShuffleQueryStage (101), Statistics(X) - +- ReusedExchange (100) + VeloxColumnarToRowExec (114) + +- ^ ProjectExecTransformer (112) + +- ^ RegularHashAggregateExecTransformer (111) + +- ^ RegularHashAggregateExecTransformer (110) + +- ^ ProjectExecTransformer (109) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (108) + :- ^ InputIteratorTransformer (103) + : +- ShuffleQueryStage (101), Statistics(X) + : +- ColumnarExchange (100) + : +- VeloxAppendBatches (99) + : +- ^ ProjectExecTransformer (97) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (96) + : :- ^ InputIteratorTransformer (91) + : : +- ShuffleQueryStage (89), Statistics(X) + : : +- ColumnarExchange (88) + : : +- VeloxAppendBatches (87) + : : +- ^ ProjectExecTransformer (85) + : : +- ^ NoopFilter (84) + : : +- ^ Scan parquet (83) + : +- ^ InputIteratorTransformer (95) + : +- ShuffleQueryStage (93), Statistics(X) + : +- ReusedExchange (92) + +- ^ InputIteratorTransformer (107) + +- ShuffleQueryStage (105), Statistics(X) + +- ReusedExchange (104) +- == Initial Plan == - HashAggregate (127) - +- HashAggregate (126) - +- Project (125) - +- ShuffledHashJoin Inner BuildRight (124) - :- Exchange (119) - : +- Project (118) - : +- ShuffledHashJoin Inner BuildRight (117) - : :- Exchange (113) - : : +- Filter (112) - : : +- Scan parquet (111) - : +- Exchange (116) - : +- Filter (115) - : +- Scan parquet (114) - +- Exchange (123) - +- Project (122) - +- Filter (121) - +- Scan parquet (120) - - -(79) Scan parquet + HashAggregate (135) + +- HashAggregate (134) + +- Project (133) + +- SortMergeJoin Inner (132) + :- Sort (126) + : +- Exchange (125) + : +- Project (124) + : +- SortMergeJoin Inner (123) + : :- Sort (118) + : : +- Exchange (117) + : : +- Filter (116) + : : +- Scan parquet (115) + : +- Sort (122) + : +- Exchange (121) + : +- Filter (120) + : +- Scan parquet (119) + +- Sort (131) + +- Exchange (130) + +- Project (129) + +- Filter (128) + +- Scan parquet (127) + + +(83) Scan parquet Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey)] ReadSchema: struct -(80) NoopFilter +(84) NoopFilter Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] -(81) ProjectExecTransformer +(85) ProjectExecTransformer Output [4]: [hash(ps_suppkey#X, 42) AS hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] -(82) WholeStageCodegenTransformer (X) +(86) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: false -(83) VeloxAppendBatches +(87) VeloxAppendBatches Input [4]: [hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: X -(84) ColumnarExchange +(88) ColumnarExchange Input [4]: [hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [ps_suppkey#X, ps_availqty#X, ps_supplycost#X], [plan_id=X], [id=#X] -(85) ShuffleQueryStage +(89) ShuffleQueryStage Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: X -(86) InputAdapter +(90) InputAdapter Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] -(87) InputIteratorTransformer +(91) InputIteratorTransformer Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] -(88) ReusedExchange [Reuses operator id: 15] +(92) ReusedExchange [Reuses operator id: 15] Output [2]: [s_suppkey#X, s_nationkey#X] -(89) ShuffleQueryStage +(93) ShuffleQueryStage Output [2]: [s_suppkey#X, s_nationkey#X] Arguments: X -(90) InputAdapter +(94) InputAdapter Input [2]: [s_suppkey#X, s_nationkey#X] -(91) InputIteratorTransformer +(95) InputIteratorTransformer Input [2]: [s_suppkey#X, s_nationkey#X] -(92) ShuffledHashJoinExecTransformer +(96) ShuffledHashJoinExecTransformer Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join type: Inner Join condition: None -(93) ProjectExecTransformer +(97) ProjectExecTransformer Output [4]: [hash(s_nationkey#X, 42) AS hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [5]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(94) WholeStageCodegenTransformer (X) +(98) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: false -(95) VeloxAppendBatches +(99) VeloxAppendBatches Input [4]: [hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: X -(96) ColumnarExchange +(100) ColumnarExchange Input [4]: [hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [ps_availqty#X, ps_supplycost#X, s_nationkey#X], [plan_id=X], [id=#X] -(97) ShuffleQueryStage +(101) ShuffleQueryStage Output [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: X -(98) InputAdapter +(102) InputAdapter Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] -(99) InputIteratorTransformer +(103) InputIteratorTransformer Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] -(100) ReusedExchange [Reuses operator id: 32] +(104) ReusedExchange [Reuses operator id: 32] Output [1]: [n_nationkey#X] -(101) ShuffleQueryStage +(105) ShuffleQueryStage Output [1]: [n_nationkey#X] Arguments: X -(102) InputAdapter +(106) InputAdapter Input [1]: [n_nationkey#X] -(103) InputIteratorTransformer +(107) InputIteratorTransformer Input [1]: [n_nationkey#X] -(104) ShuffledHashJoinExecTransformer +(108) ShuffledHashJoinExecTransformer Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(105) ProjectExecTransformer +(109) ProjectExecTransformer Output [3]: [ps_availqty#X, ps_supplycost#X, (ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))) AS _pre_X#X] Input [4]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(106) RegularHashAggregateExecTransformer +(110) RegularHashAggregateExecTransformer Input [3]: [ps_availqty#X, ps_supplycost#X, _pre_X#X] Keys: [] Functions [1]: [partial_sum(_pre_X#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(107) RegularHashAggregateExecTransformer +(111) RegularHashAggregateExecTransformer Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] Aggregate Attributes [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] Results [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] -(108) ProjectExecTransformer +(112) ProjectExecTransformer Output [1]: [(sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X * 0.0001000000) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Input [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] -(109) WholeStageCodegenTransformer (X) +(113) WholeStageCodegenTransformer (X) Input [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Arguments: false -(110) VeloxColumnarToRowExec +(114) VeloxColumnarToRowExec Input [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] -(111) Scan parquet +(115) Scan parquet Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey)] ReadSchema: struct -(112) Filter +(116) Filter Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Condition : isnotnull(ps_suppkey#X) -(113) Exchange +(117) Exchange Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) Scan parquet +(118) Sort +Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(119) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(115) Filter +(120) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(116) Exchange +(121) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) ShuffledHashJoin +(122) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(123) SortMergeJoin Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join type: Inner Join condition: None -(118) Project +(124) Project Output [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [5]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(119) Exchange +(125) Exchange Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) Scan parquet +(126) Sort +Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(127) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,GERMANY), IsNotNull(n_nationkey)] ReadSchema: struct -(121) Filter +(128) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = GERMANY)) AND isnotnull(n_nationkey#X)) -(122) Project +(129) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(123) Exchange +(130) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(124) ShuffledHashJoin +(131) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(132) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(125) Project +(133) Project Output [2]: [ps_availqty#X, ps_supplycost#X] Input [4]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(126) HashAggregate +(134) HashAggregate Input [2]: [ps_availqty#X, ps_supplycost#X] Keys: [] Functions [1]: [partial_sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(127) HashAggregate +(135) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] Aggregate Attributes [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] Results [1]: [(sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X * 0.0001000000) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] -(128) AdaptiveSparkPlan +(136) AdaptiveSparkPlan Output [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/12.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/12.txt index 9995164f4c49..8f963c49aba8 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/12.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/12.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (53) +AdaptiveSparkPlan (55) +- == Final Plan == VeloxColumnarToRowExec (38) +- ^ SortExecTransformer (36) @@ -31,20 +31,22 @@ AdaptiveSparkPlan (53) +- ^ NoopFilter (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (52) - +- Exchange (51) - +- HashAggregate (50) - +- Exchange (49) - +- HashAggregate (48) - +- Project (47) - +- ShuffledHashJoin Inner BuildLeft (46) - :- Exchange (41) - : +- Filter (40) - : +- Scan parquet (39) - +- Exchange (45) - +- Project (44) - +- Filter (43) - +- Scan parquet (42) + Sort (54) + +- Exchange (53) + +- HashAggregate (52) + +- Exchange (51) + +- HashAggregate (50) + +- Project (49) + +- SortMergeJoin Inner (48) + :- Sort (42) + : +- Exchange (41) + : +- Filter (40) + : +- Scan parquet (39) + +- Sort (47) + +- Exchange (46) + +- Project (45) + +- Filter (44) + +- Scan parquet (43) (1) Scan parquet @@ -219,61 +221,69 @@ Condition : isnotnull(o_orderkey#X) Input [2]: [o_orderkey#X, o_orderpriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(42) Scan parquet +(42) Sort +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(43) Scan parquet Output [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate), IsNotNull(l_shipdate), In(l_shipmode, [MAIL,SHIP]), GreaterThanOrEqual(l_receiptdate,1994-01-01), LessThan(l_receiptdate,1995-01-01), IsNotNull(l_orderkey)] ReadSchema: struct -(43) Filter +(44) Filter Input [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] Condition : ((((((((isnotnull(l_commitdate#X) AND isnotnull(l_receiptdate#X)) AND isnotnull(l_shipdate#X)) AND l_shipmode#X IN (MAIL,SHIP)) AND (l_commitdate#X < l_receiptdate#X)) AND (l_shipdate#X < l_commitdate#X)) AND (l_receiptdate#X >= 1994-01-01)) AND (l_receiptdate#X < 1995-01-01)) AND isnotnull(l_orderkey#X)) -(44) Project +(45) Project Output [2]: [l_orderkey#X, l_shipmode#X] Input [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] -(45) Exchange +(46) Exchange Input [2]: [l_orderkey#X, l_shipmode#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(46) ShuffledHashJoin +(47) Sort +Input [2]: [l_orderkey#X, l_shipmode#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(48) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: Inner Join condition: None -(47) Project +(49) Project Output [2]: [o_orderpriority#X, l_shipmode#X] Input [4]: [o_orderkey#X, o_orderpriority#X, l_orderkey#X, l_shipmode#X] -(48) HashAggregate +(50) HashAggregate Input [2]: [o_orderpriority#X, l_shipmode#X] Keys [1]: [l_shipmode#X] Functions [2]: [partial_sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)] Aggregate Attributes [2]: [sum#X, sum#X] Results [3]: [l_shipmode#X, sum#X, sum#X] -(49) Exchange +(51) Exchange Input [3]: [l_shipmode#X, sum#X, sum#X] Arguments: hashpartitioning(l_shipmode#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(50) HashAggregate +(52) HashAggregate Input [3]: [l_shipmode#X, sum#X, sum#X] Keys [1]: [l_shipmode#X] Functions [2]: [sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END), sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)] Aggregate Attributes [2]: [sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X, sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X] Results [3]: [l_shipmode#X, sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X AS high_line_count#X, sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X AS low_line_count#X] -(51) Exchange +(53) Exchange Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: rangepartitioning(l_shipmode#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(52) Sort +(54) Sort Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: [l_shipmode#X ASC NULLS FIRST], true, 0 -(53) AdaptiveSparkPlan +(55) AdaptiveSparkPlan Output [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/13.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/13.txt index 53801198fb49..9584f92628cd 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/13.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/13.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (56) +AdaptiveSparkPlan (58) +- == Final Plan == VeloxColumnarToRowExec (40) +- ^ SortExecTransformer (38) @@ -33,21 +33,23 @@ AdaptiveSparkPlan (56) +- ^ NoopFilter (10) +- ^ Scan parquet (9) +- == Initial Plan == - Sort (55) - +- Exchange (54) - +- HashAggregate (53) - +- Exchange (52) - +- HashAggregate (51) - +- HashAggregate (50) - +- HashAggregate (49) - +- Project (48) - +- ShuffledHashJoin LeftOuter BuildRight (47) - :- Exchange (42) - : +- Scan parquet (41) - +- Exchange (46) - +- Project (45) - +- Filter (44) - +- Scan parquet (43) + Sort (57) + +- Exchange (56) + +- HashAggregate (55) + +- Exchange (54) + +- HashAggregate (53) + +- HashAggregate (52) + +- HashAggregate (51) + +- Project (50) + +- SortMergeJoin LeftOuter (49) + :- Sort (43) + : +- Exchange (42) + : +- Scan parquet (41) + +- Sort (48) + +- Exchange (47) + +- Project (46) + +- Filter (45) + +- Scan parquet (44) (1) Scan parquet @@ -230,75 +232,83 @@ ReadSchema: struct Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) Scan parquet +(43) Sort +Input [1]: [c_custkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(44) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_comment), IsNotNull(o_custkey)] ReadSchema: struct -(44) Filter +(45) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Condition : ((isnotnull(o_comment#X) AND NOT o_comment#X LIKE %special%requests%) AND isnotnull(o_custkey#X)) -(45) Project +(46) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] -(46) Exchange +(47) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) ShuffledHashJoin +(48) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(49) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join type: LeftOuter Join condition: None -(48) Project +(50) Project Output [2]: [c_custkey#X, o_orderkey#X] Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] -(49) HashAggregate +(51) HashAggregate Input [2]: [c_custkey#X, o_orderkey#X] Keys [1]: [c_custkey#X] Functions [1]: [partial_count(o_orderkey#X)] Aggregate Attributes [1]: [count#X] Results [2]: [c_custkey#X, count#X] -(50) HashAggregate +(52) HashAggregate Input [2]: [c_custkey#X, count#X] Keys [1]: [c_custkey#X] Functions [1]: [count(o_orderkey#X)] Aggregate Attributes [1]: [count(o_orderkey#X)#X] Results [1]: [count(o_orderkey#X)#X AS c_count#X] -(51) HashAggregate +(53) HashAggregate Input [1]: [c_count#X] Keys [1]: [c_count#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [c_count#X, count#X] -(52) Exchange +(54) Exchange Input [2]: [c_count#X, count#X] Arguments: hashpartitioning(c_count#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(53) HashAggregate +(55) HashAggregate Input [2]: [c_count#X, count#X] Keys [1]: [c_count#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [c_count#X, count(1)#X AS custdist#X] -(54) Exchange +(56) Exchange Input [2]: [c_count#X, custdist#X] Arguments: rangepartitioning(custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(55) Sort +(57) Sort Input [2]: [c_count#X, custdist#X] Arguments: [custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST], true, 0 -(56) AdaptiveSparkPlan +(58) AdaptiveSparkPlan Output [2]: [c_count#X, custdist#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/14.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/14.txt index 4080a469e711..175a0e5a97cc 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/14.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/14.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (37) +AdaptiveSparkPlan (39) +- == Final Plan == VeloxColumnarToRowExec (25) +- ^ ProjectExecTransformer (23) @@ -22,17 +22,19 @@ AdaptiveSparkPlan (37) +- ^ NoopFilter (11) +- ^ Scan parquet (10) +- == Initial Plan == - HashAggregate (36) - +- HashAggregate (35) - +- Project (34) - +- ShuffledHashJoin Inner BuildRight (33) - :- Exchange (29) - : +- Project (28) - : +- Filter (27) - : +- Scan parquet (26) - +- Exchange (32) - +- Filter (31) - +- Scan parquet (30) + HashAggregate (38) + +- HashAggregate (37) + +- Project (36) + +- SortMergeJoin Inner (35) + :- Sort (30) + : +- Exchange (29) + : +- Project (28) + : +- Filter (27) + : +- Scan parquet (26) + +- Sort (34) + +- Exchange (33) + +- Filter (32) + +- Scan parquet (31) (1) Scan parquet @@ -163,45 +165,53 @@ Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(30) Scan parquet +(30) Sort +Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(31) Scan parquet Output [2]: [p_partkey#X, p_type#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_partkey)] ReadSchema: struct -(31) Filter +(32) Filter Input [2]: [p_partkey#X, p_type#X] Condition : isnotnull(p_partkey#X) -(32) Exchange +(33) Exchange Input [2]: [p_partkey#X, p_type#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(33) ShuffledHashJoin +(34) Sort +Input [2]: [p_partkey#X, p_type#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(35) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join type: Inner Join condition: None -(34) Project +(36) Project Output [3]: [l_extendedprice#X, l_discount#X, p_type#X] Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] -(35) HashAggregate +(37) HashAggregate Input [3]: [l_extendedprice#X, l_discount#X, p_type#X] Keys: [] Functions [2]: [partial_sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END), partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Results [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -(36) HashAggregate +(38) HashAggregate Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Keys: [] Functions [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END), sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [1]: [((100.00 * sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X) / sum((l_extendedprice#X * (1 - l_discount#X)))#X) AS promo_revenue#X] -(37) AdaptiveSparkPlan +(39) AdaptiveSparkPlan Output [1]: [promo_revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/15.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/15.txt index a177fe4bcaec..130d9036b4a2 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/15.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/15.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (45) +AdaptiveSparkPlan (47) +- == Final Plan == VeloxColumnarToRowExec (30) +- AQEShuffleRead (29) @@ -27,20 +27,22 @@ AdaptiveSparkPlan (45) +- ^ NoopFilter (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (44) - +- Exchange (43) - +- Project (42) - +- ShuffledHashJoin Inner BuildLeft (41) - :- Exchange (33) - : +- Filter (32) - : +- Scan parquet (31) - +- Filter (40) - +- HashAggregate (39) - +- Exchange (38) - +- HashAggregate (37) - +- Project (36) - +- Filter (35) - +- Scan parquet (34) + Sort (46) + +- Exchange (45) + +- Project (44) + +- SortMergeJoin Inner (43) + :- Sort (34) + : +- Exchange (33) + : +- Filter (32) + : +- Scan parquet (31) + +- Sort (42) + +- Filter (41) + +- HashAggregate (40) + +- Exchange (39) + +- HashAggregate (38) + +- Project (37) + +- Filter (36) + +- Scan parquet (35) (1) Scan parquet @@ -187,222 +189,230 @@ Condition : isnotnull(s_suppkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(34) Scan parquet +(34) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(35) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01), IsNotNull(l_suppkey)] ReadSchema: struct -(35) Filter +(36) Filter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : (((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) AND isnotnull(l_suppkey#X)) -(36) Project +(37) Project Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(37) HashAggregate +(38) HashAggregate Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(38) Exchange +(39) Exchange Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(39) HashAggregate +(40) HashAggregate Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [2]: [l_suppkey#X AS supplier_no#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS total_revenue#X] -(40) Filter +(41) Filter Input [2]: [supplier_no#X, total_revenue#X] Condition : (isnotnull(total_revenue#X) AND (total_revenue#X = Subquery subquery#X, [id=#X])) -(41) ShuffledHashJoin +(42) Sort +Input [2]: [supplier_no#X, total_revenue#X] +Arguments: [supplier_no#X ASC NULLS FIRST], false, 0 + +(43) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [supplier_no#X] Join type: Inner Join condition: None -(42) Project +(44) Project Output [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Input [6]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, supplier_no#X, total_revenue#X] -(43) Exchange +(45) Exchange Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: rangepartitioning(s_suppkey#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(44) Sort +(46) Sort Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: [s_suppkey#X ASC NULLS FIRST], true, 0 -(45) AdaptiveSparkPlan +(47) AdaptiveSparkPlan Output [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 22 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (71) +AdaptiveSparkPlan (73) +- == Final Plan == - VeloxColumnarToRowExec (62) - +- ^ RegularHashAggregateExecTransformer (60) - +- ^ RegularHashAggregateExecTransformer (59) - +- ^ ProjectExecTransformer (58) - +- ^ RegularHashAggregateExecTransformer (57) - +- ^ InputIteratorTransformer (56) - +- ShuffleQueryStage (54), Statistics(X) - +- ColumnarExchange (53) - +- VeloxAppendBatches (52) - +- ^ ProjectExecTransformer (50) - +- ^ FlushableHashAggregateExecTransformer (49) - +- ^ ProjectExecTransformer (48) - +- ^ NoopFilter (47) - +- ^ Scan parquet (46) + VeloxColumnarToRowExec (64) + +- ^ RegularHashAggregateExecTransformer (62) + +- ^ RegularHashAggregateExecTransformer (61) + +- ^ ProjectExecTransformer (60) + +- ^ RegularHashAggregateExecTransformer (59) + +- ^ InputIteratorTransformer (58) + +- ShuffleQueryStage (56), Statistics(X) + +- ColumnarExchange (55) + +- VeloxAppendBatches (54) + +- ^ ProjectExecTransformer (52) + +- ^ FlushableHashAggregateExecTransformer (51) + +- ^ ProjectExecTransformer (50) + +- ^ NoopFilter (49) + +- ^ Scan parquet (48) +- == Initial Plan == - HashAggregate (70) - +- HashAggregate (69) - +- HashAggregate (68) - +- Exchange (67) - +- HashAggregate (66) - +- Project (65) - +- Filter (64) - +- Scan parquet (63) + HashAggregate (72) + +- HashAggregate (71) + +- HashAggregate (70) + +- Exchange (69) + +- HashAggregate (68) + +- Project (67) + +- Filter (66) + +- Scan parquet (65) -(46) Scan parquet +(48) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01)] ReadSchema: struct -(47) NoopFilter +(49) NoopFilter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(48) ProjectExecTransformer +(50) ProjectExecTransformer Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, (l_extendedprice#X * (1 - l_discount#X)) AS _pre_X#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(49) FlushableHashAggregateExecTransformer +(51) FlushableHashAggregateExecTransformer Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, _pre_X#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum(_pre_X#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(50) ProjectExecTransformer +(52) ProjectExecTransformer Output [4]: [hash(l_suppkey#X, 42) AS hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] Input [3]: [l_suppkey#X, sum#X, isEmpty#X] -(51) WholeStageCodegenTransformer (X) +(53) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] Arguments: false -(52) VeloxAppendBatches +(54) VeloxAppendBatches Input [4]: [hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] Arguments: X -(53) ColumnarExchange +(55) ColumnarExchange Input [4]: [hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [l_suppkey#X, sum#X, isEmpty#X], [plan_id=X], [id=#X] -(54) ShuffleQueryStage +(56) ShuffleQueryStage Output [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: X -(55) InputAdapter +(57) InputAdapter Input [3]: [l_suppkey#X, sum#X, isEmpty#X] -(56) InputIteratorTransformer +(58) InputIteratorTransformer Input [3]: [l_suppkey#X, sum#X, isEmpty#X] -(57) RegularHashAggregateExecTransformer +(59) RegularHashAggregateExecTransformer Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [2]: [l_suppkey#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] -(58) ProjectExecTransformer +(60) ProjectExecTransformer Output [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X AS total_revenue#X] Input [2]: [l_suppkey#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] -(59) RegularHashAggregateExecTransformer +(61) RegularHashAggregateExecTransformer Input [1]: [total_revenue#X] Keys: [] Functions [1]: [partial_max(total_revenue#X)] Aggregate Attributes [1]: [max#X] Results [1]: [max#X] -(60) RegularHashAggregateExecTransformer +(62) RegularHashAggregateExecTransformer Input [1]: [max#X] Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(61) WholeStageCodegenTransformer (X) +(63) WholeStageCodegenTransformer (X) Input [1]: [max(total_revenue)#X] Arguments: false -(62) VeloxColumnarToRowExec +(64) VeloxColumnarToRowExec Input [1]: [max(total_revenue)#X] -(63) Scan parquet +(65) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01)] ReadSchema: struct -(64) Filter +(66) Filter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) -(65) Project +(67) Project Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(66) HashAggregate +(68) HashAggregate Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(67) Exchange +(69) Exchange Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(68) HashAggregate +(70) HashAggregate Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X AS total_revenue#X] -(69) HashAggregate +(71) HashAggregate Input [1]: [total_revenue#X] Keys: [] Functions [1]: [partial_max(total_revenue#X)] Aggregate Attributes [1]: [max#X] Results [1]: [max#X] -(70) HashAggregate +(72) HashAggregate Input [1]: [max#X] Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(71) AdaptiveSparkPlan +(73) AdaptiveSparkPlan Output [1]: [max(total_revenue)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/16.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/16.txt index 89a647ffce45..32a24beb94b9 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/16.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/16.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (69) +AdaptiveSparkPlan (71) +- == Final Plan == VeloxColumnarToRowExec (47) +- ^ SortExecTransformer (45) @@ -38,27 +38,29 @@ AdaptiveSparkPlan (69) +- ^ NoopFilter (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (68) - +- Exchange (67) - +- HashAggregate (66) - +- Exchange (65) - +- HashAggregate (64) - +- HashAggregate (63) - +- Exchange (62) - +- HashAggregate (61) - +- Project (60) - +- ShuffledHashJoin Inner BuildRight (59) - :- Exchange (55) - : +- BroadcastHashJoin LeftAnti BuildRight (54) - : :- Filter (49) - : : +- Scan parquet (48) - : +- BroadcastExchange (53) - : +- Project (52) - : +- Filter (51) - : +- Scan parquet (50) - +- Exchange (58) - +- Filter (57) - +- Scan parquet (56) + Sort (70) + +- Exchange (69) + +- HashAggregate (68) + +- Exchange (67) + +- HashAggregate (66) + +- HashAggregate (65) + +- Exchange (64) + +- HashAggregate (63) + +- Project (62) + +- SortMergeJoin Inner (61) + :- Sort (56) + : +- Exchange (55) + : +- BroadcastHashJoin LeftAnti BuildRight (54) + : :- Filter (49) + : : +- Scan parquet (48) + : +- BroadcastExchange (53) + : +- Project (52) + : +- Filter (51) + : +- Scan parquet (50) + +- Sort (60) + +- Exchange (59) + +- Filter (58) + +- Scan parquet (57) (1) Scan parquet @@ -298,75 +300,83 @@ Join condition: None Input [2]: [ps_partkey#X, ps_suppkey#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(56) Scan parquet +(56) Sort +Input [2]: [ps_partkey#X, ps_suppkey#X] +Arguments: [ps_partkey#X ASC NULLS FIRST], false, 0 + +(57) Scan parquet Output [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_brand), IsNotNull(p_type), Not(EqualTo(p_brand,Brand#X)), Not(StringStartsWith(p_type,MEDIUM POLISHED)), In(p_size, [14,19,23,3,36,45,49,9]), IsNotNull(p_partkey)] ReadSchema: struct -(57) Filter +(58) Filter Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Condition : (((((isnotnull(p_brand#X) AND isnotnull(p_type#X)) AND NOT (p_brand#X = Brand#X)) AND NOT StartsWith(p_type#X, MEDIUM POLISHED)) AND p_size#X IN (49,14,23,45,19,3,36,9)) AND isnotnull(p_partkey#X)) -(58) Exchange +(59) Exchange Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(59) ShuffledHashJoin +(60) Sort +Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(61) SortMergeJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join type: Inner Join condition: None -(60) Project +(62) Project Output [4]: [ps_suppkey#X, p_brand#X, p_type#X, p_size#X] Input [6]: [ps_partkey#X, ps_suppkey#X, p_partkey#X, p_brand#X, p_type#X, p_size#X] -(61) HashAggregate +(63) HashAggregate Input [4]: [ps_suppkey#X, p_brand#X, p_type#X, p_size#X] Keys [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Functions: [] Aggregate Attributes: [] Results [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] -(62) Exchange +(64) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Arguments: hashpartitioning(p_brand#X, p_type#X, p_size#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(63) HashAggregate +(65) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Keys [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Functions: [] Aggregate Attributes: [] Results [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] -(64) HashAggregate +(66) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Keys [3]: [p_brand#X, p_type#X, p_size#X] Functions [1]: [partial_count(distinct ps_suppkey#X)] Aggregate Attributes [1]: [count(ps_suppkey#X)#X] Results [4]: [p_brand#X, p_type#X, p_size#X, count#X] -(65) Exchange +(67) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] Arguments: hashpartitioning(p_brand#X, p_type#X, p_size#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(66) HashAggregate +(68) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] Keys [3]: [p_brand#X, p_type#X, p_size#X] Functions [1]: [count(distinct ps_suppkey#X)] Aggregate Attributes [1]: [count(ps_suppkey#X)#X] Results [4]: [p_brand#X, p_type#X, p_size#X, count(ps_suppkey#X)#X AS supplier_cnt#X] -(67) Exchange +(69) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: rangepartitioning(supplier_cnt#X DESC NULLS LAST, p_brand#X ASC NULLS FIRST, p_type#X ASC NULLS FIRST, p_size#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(68) Sort +(70) Sort Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: [supplier_cnt#X DESC NULLS LAST, p_brand#X ASC NULLS FIRST, p_type#X ASC NULLS FIRST, p_size#X ASC NULLS FIRST], true, 0 -(69) AdaptiveSparkPlan +(71) AdaptiveSparkPlan Output [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/17.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/17.txt index 42fc32b0bce1..59baa2d7a08c 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/17.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/17.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (60) +AdaptiveSparkPlan (63) +- == Final Plan == VeloxColumnarToRowExec (40) +- ^ ProjectExecTransformer (38) @@ -35,25 +35,28 @@ AdaptiveSparkPlan (60) +- ^ NoopFilter (22) +- ^ Scan parquet (21) +- == Initial Plan == - HashAggregate (59) - +- HashAggregate (58) - +- Project (57) - +- ShuffledHashJoin Inner BuildRight (56) - :- Project (49) - : +- ShuffledHashJoin Inner BuildRight (48) - : :- Exchange (43) - : : +- Filter (42) - : : +- Scan parquet (41) - : +- Exchange (47) - : +- Project (46) - : +- Filter (45) - : +- Scan parquet (44) - +- Filter (55) - +- HashAggregate (54) - +- Exchange (53) - +- HashAggregate (52) - +- Filter (51) - +- Scan parquet (50) + HashAggregate (62) + +- HashAggregate (61) + +- Project (60) + +- SortMergeJoin Inner (59) + :- Project (51) + : +- SortMergeJoin Inner (50) + : :- Sort (44) + : : +- Exchange (43) + : : +- Filter (42) + : : +- Scan parquet (41) + : +- Sort (49) + : +- Exchange (48) + : +- Project (47) + : +- Filter (46) + : +- Scan parquet (45) + +- Sort (58) + +- Filter (57) + +- HashAggregate (56) + +- Exchange (55) + +- HashAggregate (54) + +- Filter (53) + +- Scan parquet (52) (1) Scan parquet @@ -249,92 +252,104 @@ Condition : (isnotnull(l_partkey#X) AND isnotnull(l_quantity#X)) Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(44) Scan parquet +(44) Sort +Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(45) Scan parquet Output [3]: [p_partkey#X, p_brand#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_brand), IsNotNull(p_container), EqualTo(p_brand,Brand#X), EqualTo(p_container,MED BOX), IsNotNull(p_partkey)] ReadSchema: struct -(45) Filter +(46) Filter Input [3]: [p_partkey#X, p_brand#X, p_container#X] Condition : ((((isnotnull(p_brand#X) AND isnotnull(p_container#X)) AND (p_brand#X = Brand#X)) AND (p_container#X = MED BOX)) AND isnotnull(p_partkey#X)) -(46) Project +(47) Project Output [1]: [p_partkey#X] Input [3]: [p_partkey#X, p_brand#X, p_container#X] -(47) Exchange +(48) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) ShuffledHashJoin +(49) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(50) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join type: Inner Join condition: None -(49) Project +(51) Project Output [3]: [l_quantity#X, l_extendedprice#X, p_partkey#X] Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, p_partkey#X] -(50) Scan parquet +(52) Scan parquet Output [2]: [l_partkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey)] ReadSchema: struct -(51) Filter +(53) Filter Input [2]: [l_partkey#X, l_quantity#X] Condition : isnotnull(l_partkey#X) -(52) HashAggregate +(54) HashAggregate Input [2]: [l_partkey#X, l_quantity#X] Keys [1]: [l_partkey#X] Functions [1]: [partial_avg(l_quantity#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [3]: [l_partkey#X, sum#X, count#X] -(53) Exchange +(55) Exchange Input [3]: [l_partkey#X, sum#X, count#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(54) HashAggregate +(56) HashAggregate Input [3]: [l_partkey#X, sum#X, count#X] Keys [1]: [l_partkey#X] Functions [1]: [avg(l_quantity#X)] Aggregate Attributes [1]: [avg(l_quantity#X)#X] Results [2]: [(0.2 * avg(l_quantity#X)#X) AS (0.2 * avg(l_quantity))#X, l_partkey#X] -(55) Filter +(57) Filter Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] Condition : isnotnull((0.2 * avg(l_quantity))#X) -(56) ShuffledHashJoin +(58) Sort +Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(59) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join type: Inner Join condition: (cast(l_quantity#X as decimal(18,7)) < (0.2 * avg(l_quantity))#X) -(57) Project +(60) Project Output [1]: [l_extendedprice#X] Input [5]: [l_quantity#X, l_extendedprice#X, p_partkey#X, (0.2 * avg(l_quantity))#X, l_partkey#X] -(58) HashAggregate +(61) HashAggregate Input [1]: [l_extendedprice#X] Keys: [] Functions [1]: [partial_sum(l_extendedprice#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(59) HashAggregate +(62) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(l_extendedprice#X)] Aggregate Attributes [1]: [sum(l_extendedprice#X)#X] Results [1]: [(sum(l_extendedprice#X)#X / 7.0) AS avg_yearly#X] -(60) AdaptiveSparkPlan +(63) AdaptiveSparkPlan Output [1]: [avg_yearly#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/18.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/18.txt index ca913b2d4c84..c78e265e54d9 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/18.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/18.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (103) +AdaptiveSparkPlan (110) +- == Final Plan == VeloxColumnarToRowExec (70) +- TakeOrderedAndProjectExecTransformer (69) @@ -58,38 +58,45 @@ AdaptiveSparkPlan (103) +- ShuffleQueryStage (57), Statistics(X) +- ReusedExchange (56) +- == Initial Plan == - TakeOrderedAndProject (102) - +- HashAggregate (101) - +- HashAggregate (100) - +- Project (99) - +- ShuffledHashJoin Inner BuildRight (98) - :- Exchange (87) - : +- Project (86) - : +- ShuffledHashJoin Inner BuildLeft (85) - : :- Exchange (73) - : : +- Filter (72) - : : +- Scan parquet (71) - : +- Exchange (84) - : +- ShuffledHashJoin LeftSemi BuildRight (83) - : :- Exchange (76) - : : +- Filter (75) - : : +- Scan parquet (74) - : +- Project (82) - : +- Filter (81) - : +- HashAggregate (80) - : +- Exchange (79) - : +- HashAggregate (78) - : +- Scan parquet (77) - +- ShuffledHashJoin LeftSemi BuildRight (97) - :- Exchange (90) - : +- Filter (89) - : +- Scan parquet (88) - +- Project (96) - +- Filter (95) - +- HashAggregate (94) - +- Exchange (93) - +- HashAggregate (92) - +- Scan parquet (91) + TakeOrderedAndProject (109) + +- HashAggregate (108) + +- HashAggregate (107) + +- Project (106) + +- SortMergeJoin Inner (105) + :- Sort (92) + : +- Exchange (91) + : +- Project (90) + : +- SortMergeJoin Inner (89) + : :- Sort (74) + : : +- Exchange (73) + : : +- Filter (72) + : : +- Scan parquet (71) + : +- Sort (88) + : +- Exchange (87) + : +- SortMergeJoin LeftSemi (86) + : :- Sort (78) + : : +- Exchange (77) + : : +- Filter (76) + : : +- Scan parquet (75) + : +- Sort (85) + : +- Project (84) + : +- Filter (83) + : +- HashAggregate (82) + : +- Exchange (81) + : +- HashAggregate (80) + : +- Scan parquet (79) + +- SortMergeJoin LeftSemi (104) + :- Sort (96) + : +- Exchange (95) + : +- Filter (94) + : +- Scan parquet (93) + +- Sort (103) + +- Project (102) + +- Filter (101) + +- HashAggregate (100) + +- Exchange (99) + +- HashAggregate (98) + +- Scan parquet (97) (1) Scan parquet @@ -405,158 +412,186 @@ Condition : isnotnull(c_custkey#X) Input [2]: [c_custkey#X, c_name#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) Scan parquet +(74) Sort +Input [2]: [c_custkey#X, c_name#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(75) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(75) Filter +(76) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Condition : (isnotnull(o_custkey#X) AND isnotnull(o_orderkey#X)) -(76) Exchange +(77) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(77) Scan parquet +(78) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(79) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(78) HashAggregate +(80) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(79) Exchange +(81) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(80) HashAggregate +(82) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(81) Filter +(83) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(82) Project +(84) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(83) ShuffledHashJoin +(85) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(86) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: LeftSemi Join condition: None -(84) Exchange +(87) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(85) ShuffledHashJoin +(88) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(89) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join type: Inner Join condition: None -(86) Project +(90) Project Output [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] -(87) Exchange +(91) Exchange Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(88) Scan parquet +(92) Sort +Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(93) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_orderkey)] ReadSchema: struct -(89) Filter +(94) Filter Input [2]: [l_orderkey#X, l_quantity#X] Condition : isnotnull(l_orderkey#X) -(90) Exchange +(95) Exchange Input [2]: [l_orderkey#X, l_quantity#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(91) Scan parquet +(96) Sort +Input [2]: [l_orderkey#X, l_quantity#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(97) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(92) HashAggregate +(98) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(93) Exchange +(99) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(94) HashAggregate +(100) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(95) Filter +(101) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(96) Project +(102) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(97) ShuffledHashJoin +(103) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(104) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: LeftSemi Join condition: None -(98) ShuffledHashJoin +(105) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: Inner Join condition: None -(99) Project +(106) Project Output [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_orderkey#X, l_quantity#X] -(100) HashAggregate +(107) HashAggregate Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] -(101) HashAggregate +(108) HashAggregate Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity#X)#X AS sum(l_quantity)#X] -(102) TakeOrderedAndProject +(109) TakeOrderedAndProject Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: X, [o_totalprice#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] -(103) AdaptiveSparkPlan +(110) AdaptiveSparkPlan Output [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/19.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/19.txt index 91187ac8d5a7..a9c629524fb7 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/19.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/19.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (36) +AdaptiveSparkPlan (38) +- == Final Plan == VeloxColumnarToRowExec (24) +- ^ RegularHashAggregateExecTransformer (22) @@ -21,17 +21,19 @@ AdaptiveSparkPlan (36) +- ^ NoopFilter (11) +- ^ Scan parquet (10) +- == Initial Plan == - HashAggregate (35) - +- HashAggregate (34) - +- Project (33) - +- ShuffledHashJoin Inner BuildRight (32) - :- Exchange (28) - : +- Project (27) - : +- Filter (26) - : +- Scan parquet (25) - +- Exchange (31) - +- Filter (30) - +- Scan parquet (29) + HashAggregate (37) + +- HashAggregate (36) + +- Project (35) + +- SortMergeJoin Inner (34) + :- Sort (29) + : +- Exchange (28) + : +- Project (27) + : +- Filter (26) + : +- Scan parquet (25) + +- Sort (33) + +- Exchange (32) + +- Filter (31) + +- Scan parquet (30) (1) Scan parquet @@ -158,45 +160,53 @@ Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipin Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(29) Scan parquet +(29) Sort +Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(30) Scan parquet Output [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_size), GreaterThanOrEqual(p_size,1), IsNotNull(p_partkey), Or(Or(And(And(EqualTo(p_brand,Brand#X),In(p_container, [SM BOX,SM CASE,SM PACK,SM PKG])),LessThanOrEqual(p_size,5)),And(And(EqualTo(p_brand,Brand#X),In(p_container, [MED BAG,MED BOX,MED PACK,MED PKG])),LessThanOrEqual(p_size,10))),And(And(EqualTo(p_brand,Brand#X),In(p_container, [LG BOX,LG CASE,LG PACK,LG PKG])),LessThanOrEqual(p_size,15)))] ReadSchema: struct -(30) Filter +(31) Filter Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Condition : (((isnotnull(p_size#X) AND (p_size#X >= 1)) AND isnotnull(p_partkey#X)) AND (((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (p_size#X <= 5)) OR (((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (p_size#X <= 10))) OR (((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (p_size#X <= 15)))) -(31) Exchange +(32) Exchange Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(32) ShuffledHashJoin +(33) Sort +Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(34) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join type: Inner Join condition: (((((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (l_quantity#X >= 1.00)) AND (l_quantity#X <= 11.00)) AND (p_size#X <= 5)) OR (((((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (l_quantity#X >= 10.00)) AND (l_quantity#X <= 20.00)) AND (p_size#X <= 10))) OR (((((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (l_quantity#X >= 20.00)) AND (l_quantity#X <= 30.00)) AND (p_size#X <= 15))) -(33) Project +(35) Project Output [2]: [l_extendedprice#X, l_discount#X] Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_brand#X, p_size#X, p_container#X] -(34) HashAggregate +(36) HashAggregate Input [2]: [l_extendedprice#X, l_discount#X] Keys: [] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(35) HashAggregate +(37) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X] -(36) AdaptiveSparkPlan +(38) AdaptiveSparkPlan Output [1]: [revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/20.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/20.txt index 01ab88ee0b2b..8e929ff7b296 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/20.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/20.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (133) +AdaptiveSparkPlan (143) +- == Final Plan == VeloxColumnarToRowExec (93) +- AQEShuffleRead (92) @@ -75,45 +75,55 @@ AdaptiveSparkPlan (133) +- ^ NoopFilter (78) +- ^ Scan parquet (77) +- == Initial Plan == - Sort (132) - +- Exchange (131) - +- Project (130) - +- ShuffledHashJoin Inner BuildRight (129) - :- Exchange (124) - : +- Project (123) - : +- ShuffledHashJoin LeftSemi BuildRight (122) - : :- Exchange (96) - : : +- Filter (95) - : : +- Scan parquet (94) - : +- Exchange (121) - : +- Project (120) - : +- ShuffledHashJoin Inner BuildLeft (119) - : :- Exchange (105) - : : +- ShuffledHashJoin LeftSemi BuildRight (104) - : : :- Exchange (99) - : : : +- Filter (98) - : : : +- Scan parquet (97) - : : +- Exchange (103) - : : +- Project (102) - : : +- Filter (101) - : : +- Scan parquet (100) - : +- Exchange (118) - : +- Filter (117) - : +- HashAggregate (116) - : +- HashAggregate (115) - : +- ShuffledHashJoin LeftSemi BuildRight (114) - : :- Exchange (109) - : : +- Project (108) - : : +- Filter (107) - : : +- Scan parquet (106) - : +- Exchange (113) - : +- Project (112) - : +- Filter (111) - : +- Scan parquet (110) - +- Exchange (128) - +- Project (127) - +- Filter (126) - +- Scan parquet (125) + Sort (142) + +- Exchange (141) + +- Project (140) + +- SortMergeJoin Inner (139) + :- Sort (133) + : +- Exchange (132) + : +- Project (131) + : +- SortMergeJoin LeftSemi (130) + : :- Sort (97) + : : +- Exchange (96) + : : +- Filter (95) + : : +- Scan parquet (94) + : +- Sort (129) + : +- Exchange (128) + : +- Project (127) + : +- SortMergeJoin Inner (126) + : :- Sort (109) + : : +- Exchange (108) + : : +- SortMergeJoin LeftSemi (107) + : : :- Sort (101) + : : : +- Exchange (100) + : : : +- Filter (99) + : : : +- Scan parquet (98) + : : +- Sort (106) + : : +- Exchange (105) + : : +- Project (104) + : : +- Filter (103) + : : +- Scan parquet (102) + : +- Sort (125) + : +- Exchange (124) + : +- Filter (123) + : +- HashAggregate (122) + : +- HashAggregate (121) + : +- SortMergeJoin LeftSemi (120) + : :- Sort (114) + : : +- Exchange (113) + : : +- Project (112) + : : +- Filter (111) + : : +- Scan parquet (110) + : +- Sort (119) + : +- Exchange (118) + : +- Project (117) + : +- Filter (116) + : +- Scan parquet (115) + +- Sort (138) + +- Exchange (137) + +- Project (136) + +- Filter (135) + +- Scan parquet (134) (1) Scan parquet @@ -512,181 +522,221 @@ Condition : isnotnull(s_nationkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(97) Scan parquet +(97) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(98) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_availqty), IsNotNull(ps_partkey), IsNotNull(ps_suppkey)] ReadSchema: struct -(98) Filter +(99) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Condition : ((isnotnull(ps_availqty#X) AND isnotnull(ps_partkey#X)) AND isnotnull(ps_suppkey#X)) -(99) Exchange +(100) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(100) Scan parquet +(101) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: [ps_partkey#X ASC NULLS FIRST], false, 0 + +(102) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(101) Filter +(103) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(102) Project +(104) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(103) Exchange +(105) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(104) ShuffledHashJoin +(106) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(107) SortMergeJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join type: LeftSemi Join condition: None -(105) Exchange +(108) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(106) Scan parquet +(109) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: [ps_partkey#X ASC NULLS FIRST, ps_suppkey#X ASC NULLS FIRST], false, 0 + +(110) Scan parquet Output [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)] ReadSchema: struct -(107) Filter +(111) Filter Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND isnotnull(l_partkey#X)) AND isnotnull(l_suppkey#X)) -(108) Project +(112) Project Output [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] -(109) Exchange +(113) Exchange Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(110) Scan parquet +(114) Sort +Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(115) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(111) Filter +(116) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(112) Project +(117) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(113) Exchange +(118) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(119) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(120) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join type: LeftSemi Join condition: None -(115) HashAggregate +(121) HashAggregate Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -(116) HashAggregate +(122) HashAggregate Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [(0.5 * sum(l_quantity#X)#X) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(117) Filter +(123) Filter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Condition : isnotnull((0.5 * sum(l_quantity))#X) -(118) Exchange +(124) Exchange Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(119) ShuffledHashJoin +(125) Sort +Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] +Arguments: [l_partkey#X ASC NULLS FIRST, l_suppkey#X ASC NULLS FIRST], false, 0 + +(126) SortMergeJoin Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join type: Inner Join condition: (cast(ps_availqty#X as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(120) Project +(127) Project Output [1]: [ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(121) Exchange +(128) Exchange Input [1]: [ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(122) ShuffledHashJoin +(129) Sort +Input [1]: [ps_suppkey#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(130) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join type: LeftSemi Join condition: None -(123) Project +(131) Project Output [3]: [s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(124) Exchange +(132) Exchange Input [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(125) Scan parquet +(133) Sort +Input [3]: [s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(134) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(126) Filter +(135) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(127) Project +(136) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(128) Exchange +(137) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) ShuffledHashJoin +(138) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(139) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(130) Project +(140) Project Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(131) Exchange +(141) Exchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) Sort +(142) Sort Input [2]: [s_name#X, s_address#X] Arguments: [s_name#X ASC NULLS FIRST], true, 0 -(133) AdaptiveSparkPlan +(143) AdaptiveSparkPlan Output [2]: [s_name#X, s_address#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/21.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/21.txt index 317740080d7b..279f4f096692 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/21.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/21.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (128) +AdaptiveSparkPlan (137) +- == Final Plan == VeloxColumnarToRowExec (91) +- ^ RegularHashAggregateExecTransformer (89) @@ -72,42 +72,51 @@ AdaptiveSparkPlan (128) +- ^ NoopFilter (71) +- ^ Scan parquet (70) +- == Initial Plan == - TakeOrderedAndProject (127) - +- HashAggregate (126) - +- Exchange (125) - +- HashAggregate (124) - +- Project (123) - +- ShuffledHashJoin Inner BuildRight (122) - :- Exchange (117) - : +- Project (116) - : +- ShuffledHashJoin Inner BuildRight (115) - : :- Exchange (110) - : : +- Project (109) - : : +- ShuffledHashJoin Inner BuildLeft (108) - : : :- Exchange (94) - : : : +- Filter (93) - : : : +- Scan parquet (92) - : : +- Exchange (107) - : : +- ShuffledHashJoin LeftAnti BuildRight (106) - : : :- ShuffledHashJoin LeftSemi BuildRight (101) - : : : :- Exchange (98) - : : : : +- Project (97) - : : : : +- Filter (96) - : : : : +- Scan parquet (95) - : : : +- Exchange (100) - : : : +- Scan parquet (99) - : : +- Exchange (105) - : : +- Project (104) - : : +- Filter (103) - : : +- Scan parquet (102) - : +- Exchange (114) - : +- Project (113) - : +- Filter (112) - : +- Scan parquet (111) - +- Exchange (121) - +- Project (120) - +- Filter (119) - +- Scan parquet (118) + TakeOrderedAndProject (136) + +- HashAggregate (135) + +- Exchange (134) + +- HashAggregate (133) + +- Project (132) + +- SortMergeJoin Inner (131) + :- Sort (125) + : +- Exchange (124) + : +- Project (123) + : +- SortMergeJoin Inner (122) + : :- Sort (116) + : : +- Exchange (115) + : : +- Project (114) + : : +- SortMergeJoin Inner (113) + : : :- Sort (95) + : : : +- Exchange (94) + : : : +- Filter (93) + : : : +- Scan parquet (92) + : : +- Sort (112) + : : +- Exchange (111) + : : +- SortMergeJoin LeftAnti (110) + : : :- SortMergeJoin LeftSemi (104) + : : : :- Sort (100) + : : : : +- Exchange (99) + : : : : +- Project (98) + : : : : +- Filter (97) + : : : : +- Scan parquet (96) + : : : +- Sort (103) + : : : +- Exchange (102) + : : : +- Scan parquet (101) + : : +- Sort (109) + : : +- Exchange (108) + : : +- Project (107) + : : +- Filter (106) + : : +- Scan parquet (105) + : +- Sort (121) + : +- Exchange (120) + : +- Project (119) + : +- Filter (118) + : +- Scan parquet (117) + +- Sort (130) + +- Exchange (129) + +- Project (128) + +- Filter (127) + +- Scan parquet (126) (1) Scan parquet @@ -501,168 +510,204 @@ Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(95) Scan parquet +(95) Sort +Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(96) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(96) Filter +(97) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((((isnotnull(l_receiptdate#X) AND isnotnull(l_commitdate#X)) AND (l_receiptdate#X > l_commitdate#X)) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(97) Project +(98) Project Output [2]: [l_orderkey#X, l_suppkey#X] Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] -(98) Exchange +(99) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(99) Scan parquet +(100) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(101) Scan parquet Output [2]: [l_orderkey#X, l_suppkey#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(100) Exchange +(102) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(101) ShuffledHashJoin +(103) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(104) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: LeftSemi Join condition: NOT (l_suppkey#X = l_suppkey#X) -(102) Scan parquet +(105) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate)] ReadSchema: struct -(103) Filter +(106) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((isnotnull(l_receiptdate#X) AND isnotnull(l_commitdate#X)) AND (l_receiptdate#X > l_commitdate#X)) -(104) Project +(107) Project Output [2]: [l_orderkey#X, l_suppkey#X] Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] -(105) Exchange +(108) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(106) ShuffledHashJoin +(109) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(110) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: LeftAnti Join condition: NOT (l_suppkey#X = l_suppkey#X) -(107) Exchange +(111) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(108) ShuffledHashJoin +(112) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(113) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [l_suppkey#X] Join type: Inner Join condition: None -(109) Project +(114) Project Output [3]: [s_name#X, s_nationkey#X, l_orderkey#X] Input [5]: [s_suppkey#X, s_name#X, s_nationkey#X, l_orderkey#X, l_suppkey#X] -(110) Exchange +(115) Exchange Input [3]: [s_name#X, s_nationkey#X, l_orderkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(111) Scan parquet +(116) Sort +Input [3]: [s_name#X, s_nationkey#X, l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(117) Scan parquet Output [2]: [o_orderkey#X, o_orderstatus#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderstatus), EqualTo(o_orderstatus,F), IsNotNull(o_orderkey)] ReadSchema: struct -(112) Filter +(118) Filter Input [2]: [o_orderkey#X, o_orderstatus#X] Condition : ((isnotnull(o_orderstatus#X) AND (o_orderstatus#X = F)) AND isnotnull(o_orderkey#X)) -(113) Project +(119) Project Output [1]: [o_orderkey#X] Input [2]: [o_orderkey#X, o_orderstatus#X] -(114) Exchange +(120) Exchange Input [1]: [o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(115) ShuffledHashJoin +(121) Sort +Input [1]: [o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(122) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join type: Inner Join condition: None -(116) Project +(123) Project Output [2]: [s_name#X, s_nationkey#X] Input [4]: [s_name#X, s_nationkey#X, l_orderkey#X, o_orderkey#X] -(117) Exchange +(124) Exchange Input [2]: [s_name#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(118) Scan parquet +(125) Sort +Input [2]: [s_name#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(126) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,SAUDI ARABIA), IsNotNull(n_nationkey)] ReadSchema: struct -(119) Filter +(127) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = SAUDI ARABIA)) AND isnotnull(n_nationkey#X)) -(120) Project +(128) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(121) Exchange +(129) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(122) ShuffledHashJoin +(130) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(131) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(123) Project +(132) Project Output [1]: [s_name#X] Input [3]: [s_name#X, s_nationkey#X, n_nationkey#X] -(124) HashAggregate +(133) HashAggregate Input [1]: [s_name#X] Keys [1]: [s_name#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [s_name#X, count#X] -(125) Exchange +(134) Exchange Input [2]: [s_name#X, count#X] Arguments: hashpartitioning(s_name#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) HashAggregate +(135) HashAggregate Input [2]: [s_name#X, count#X] Keys [1]: [s_name#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [s_name#X, count(1)#X AS numwait#X] -(127) TakeOrderedAndProject +(136) TakeOrderedAndProject Input [2]: [s_name#X, numwait#X] Arguments: X, [numwait#X DESC NULLS LAST, s_name#X ASC NULLS FIRST], [s_name#X, numwait#X] -(128) AdaptiveSparkPlan +(137) AdaptiveSparkPlan Output [2]: [s_name#X, numwait#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/22.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/22.txt index 0d779c9160cf..2b93055014bd 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/22.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/22.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (50) +AdaptiveSparkPlan (52) +- == Final Plan == VeloxColumnarToRowExec (37) +- ^ SortExecTransformer (35) @@ -30,18 +30,20 @@ AdaptiveSparkPlan (50) +- ^ ProjectExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (49) - +- Exchange (48) - +- HashAggregate (47) - +- Exchange (46) - +- HashAggregate (45) - +- Project (44) - +- ShuffledHashJoin LeftAnti BuildRight (43) - :- Exchange (40) - : +- Filter (39) - : +- Scan parquet (38) - +- Exchange (42) - +- Scan parquet (41) + Sort (51) + +- Exchange (50) + +- HashAggregate (49) + +- Exchange (48) + +- HashAggregate (47) + +- Project (46) + +- SortMergeJoin LeftAnti (45) + :- Sort (41) + : +- Exchange (40) + : +- Filter (39) + : +- Scan parquet (38) + +- Sort (44) + +- Exchange (43) + +- Scan parquet (42) (1) Scan parquet @@ -211,171 +213,179 @@ Condition : ((isnotnull(c_acctbal#X) AND substring(c_phone#X, 1, 2) IN (13,31,23 Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(41) Scan parquet +(41) Sort +Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(42) Scan parquet Output [1]: [o_custkey#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(42) Exchange +(43) Exchange Input [1]: [o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) ShuffledHashJoin +(44) Sort +Input [1]: [o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(45) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join type: LeftAnti Join condition: None -(44) Project +(46) Project Output [2]: [substring(c_phone#X, 1, 2) AS cntrycode#X, c_acctbal#X] Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] -(45) HashAggregate +(47) HashAggregate Input [2]: [cntrycode#X, c_acctbal#X] Keys [1]: [cntrycode#X] Functions [2]: [partial_count(1), partial_sum(c_acctbal#X)] Aggregate Attributes [3]: [count#X, sum#X, isEmpty#X] Results [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] -(46) Exchange +(48) Exchange Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] Arguments: hashpartitioning(cntrycode#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) HashAggregate +(49) HashAggregate Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] Keys [1]: [cntrycode#X] Functions [2]: [count(1), sum(c_acctbal#X)] Aggregate Attributes [2]: [count(1)#X, sum(c_acctbal#X)#X] Results [3]: [cntrycode#X, count(1)#X AS numcust#X, sum(c_acctbal#X)#X AS totacctbal#X] -(48) Exchange +(50) Exchange Input [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: rangepartitioning(cntrycode#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(49) Sort +(51) Sort Input [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: [cntrycode#X ASC NULLS FIRST], true, 0 -(50) AdaptiveSparkPlan +(52) AdaptiveSparkPlan Output [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 1 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (70) +AdaptiveSparkPlan (72) +- == Final Plan == - VeloxColumnarToRowExec (63) - +- ^ RegularHashAggregateExecTransformer (61) - +- ^ InputIteratorTransformer (60) - +- ShuffleQueryStage (58), Statistics(X) - +- ColumnarExchange (57) - +- VeloxAppendBatches (56) - +- ^ FlushableHashAggregateExecTransformer (54) - +- ^ ProjectExecTransformer (53) - +- ^ NoopFilter (52) - +- ^ Scan parquet (51) + VeloxColumnarToRowExec (65) + +- ^ RegularHashAggregateExecTransformer (63) + +- ^ InputIteratorTransformer (62) + +- ShuffleQueryStage (60), Statistics(X) + +- ColumnarExchange (59) + +- VeloxAppendBatches (58) + +- ^ FlushableHashAggregateExecTransformer (56) + +- ^ ProjectExecTransformer (55) + +- ^ NoopFilter (54) + +- ^ Scan parquet (53) +- == Initial Plan == - HashAggregate (69) - +- Exchange (68) - +- HashAggregate (67) - +- Project (66) - +- Filter (65) - +- Scan parquet (64) + HashAggregate (71) + +- Exchange (70) + +- HashAggregate (69) + +- Project (68) + +- Filter (67) + +- Scan parquet (66) -(51) Scan parquet +(53) Scan parquet Output [2]: [c_phone#X, c_acctbal#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_acctbal), GreaterThan(c_acctbal,0.00)] ReadSchema: struct -(52) NoopFilter +(54) NoopFilter Input [2]: [c_phone#X, c_acctbal#X] Arguments: [c_phone#X, c_acctbal#X] -(53) ProjectExecTransformer +(55) ProjectExecTransformer Output [1]: [c_acctbal#X] Input [2]: [c_phone#X, c_acctbal#X] -(54) FlushableHashAggregateExecTransformer +(56) FlushableHashAggregateExecTransformer Input [1]: [c_acctbal#X] Keys: [] Functions [1]: [partial_avg(c_acctbal#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [2]: [sum#X, count#X] -(55) WholeStageCodegenTransformer (X) +(57) WholeStageCodegenTransformer (X) Input [2]: [sum#X, count#X] Arguments: false -(56) VeloxAppendBatches +(58) VeloxAppendBatches Input [2]: [sum#X, count#X] Arguments: X -(57) ColumnarExchange +(59) ColumnarExchange Input [2]: [sum#X, count#X] Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] -(58) ShuffleQueryStage +(60) ShuffleQueryStage Output [2]: [sum#X, count#X] Arguments: X -(59) InputAdapter +(61) InputAdapter Input [2]: [sum#X, count#X] -(60) InputIteratorTransformer +(62) InputIteratorTransformer Input [2]: [sum#X, count#X] -(61) RegularHashAggregateExecTransformer +(63) RegularHashAggregateExecTransformer Input [2]: [sum#X, count#X] Keys: [] Functions [1]: [avg(c_acctbal#X)] Aggregate Attributes [1]: [avg(c_acctbal#X)#X] Results [1]: [avg(c_acctbal#X)#X AS avg(c_acctbal)#X] -(62) WholeStageCodegenTransformer (X) +(64) WholeStageCodegenTransformer (X) Input [1]: [avg(c_acctbal)#X] Arguments: false -(63) VeloxColumnarToRowExec +(65) VeloxColumnarToRowExec Input [1]: [avg(c_acctbal)#X] -(64) Scan parquet +(66) Scan parquet Output [2]: [c_phone#X, c_acctbal#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_acctbal), GreaterThan(c_acctbal,0.00)] ReadSchema: struct -(65) Filter +(67) Filter Input [2]: [c_phone#X, c_acctbal#X] Condition : ((isnotnull(c_acctbal#X) AND (c_acctbal#X > 0.00)) AND substring(c_phone#X, 1, 2) IN (13,31,23,29,30,18,17)) -(66) Project +(68) Project Output [1]: [c_acctbal#X] Input [2]: [c_phone#X, c_acctbal#X] -(67) HashAggregate +(69) HashAggregate Input [1]: [c_acctbal#X] Keys: [] Functions [1]: [partial_avg(c_acctbal#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [2]: [sum#X, count#X] -(68) Exchange +(70) Exchange Input [2]: [sum#X, count#X] Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=X] -(69) HashAggregate +(71) HashAggregate Input [2]: [sum#X, count#X] Keys: [] Functions [1]: [avg(c_acctbal#X)] Aggregate Attributes [1]: [avg(c_acctbal#X)#X] Results [1]: [avg(c_acctbal#X)#X AS avg(c_acctbal)#X] -(70) AdaptiveSparkPlan +(72) AdaptiveSparkPlan Output [1]: [avg(c_acctbal)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/3.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/3.txt index 8c671a61c9f7..aa679861da7c 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/3.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/3.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (63) +AdaptiveSparkPlan (67) +- == Final Plan == VeloxColumnarToRowExec (43) +- TakeOrderedAndProjectExecTransformer (42) @@ -36,25 +36,29 @@ AdaptiveSparkPlan (63) +- ^ NoopFilter (28) +- ^ Scan parquet (27) +- == Initial Plan == - TakeOrderedAndProject (62) - +- HashAggregate (61) - +- HashAggregate (60) - +- Project (59) - +- ShuffledHashJoin Inner BuildRight (58) - :- Exchange (53) - : +- Project (52) - : +- ShuffledHashJoin Inner BuildLeft (51) - : :- Exchange (47) - : : +- Project (46) - : : +- Filter (45) - : : +- Scan parquet (44) - : +- Exchange (50) - : +- Filter (49) - : +- Scan parquet (48) - +- Exchange (57) - +- Project (56) - +- Filter (55) - +- Scan parquet (54) + TakeOrderedAndProject (66) + +- HashAggregate (65) + +- HashAggregate (64) + +- Project (63) + +- SortMergeJoin Inner (62) + :- Sort (56) + : +- Exchange (55) + : +- Project (54) + : +- SortMergeJoin Inner (53) + : :- Sort (48) + : : +- Exchange (47) + : : +- Project (46) + : : +- Filter (45) + : : +- Scan parquet (44) + : +- Sort (52) + : +- Exchange (51) + : +- Filter (50) + : +- Scan parquet (49) + +- Sort (61) + +- Exchange (60) + +- Project (59) + +- Filter (58) + +- Scan parquet (57) (1) Scan parquet @@ -258,82 +262,98 @@ Input [2]: [c_custkey#X, c_mktsegment#X] Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) Scan parquet +(48) Sort +Input [1]: [c_custkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(49) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), LessThan(o_orderdate,1995-03-15), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(49) Filter +(50) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Condition : (((isnotnull(o_orderdate#X) AND (o_orderdate#X < 1995-03-15)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(50) Exchange +(51) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(51) ShuffledHashJoin +(52) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(53) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join type: Inner Join condition: None -(52) Project +(54) Project Output [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Input [5]: [c_custkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] -(53) Exchange +(55) Exchange Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(54) Scan parquet +(56) Sort +Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(57) Scan parquet Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThan(l_shipdate,1995-03-15), IsNotNull(l_orderkey)] ReadSchema: struct -(55) Filter +(58) Filter Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X > 1995-03-15)) AND isnotnull(l_orderkey#X)) -(56) Project +(59) Project Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(57) Exchange +(60) Exchange Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(58) ShuffledHashJoin +(61) Sort +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(62) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: Inner Join condition: None -(59) Project +(63) Project Output [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] -(60) HashAggregate +(64) HashAggregate Input [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] -(61) HashAggregate +(65) HashAggregate Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [4]: [l_orderkey#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X, o_orderdate#X, o_shippriority#X] -(62) TakeOrderedAndProject +(66) TakeOrderedAndProject Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: X, [revenue#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] -(63) AdaptiveSparkPlan +(67) AdaptiveSparkPlan Output [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/4.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/4.txt index 3d145f0c3bb8..02c494288f95 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/4.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/4.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (54) +AdaptiveSparkPlan (56) +- == Final Plan == VeloxColumnarToRowExec (38) +- ^ SortExecTransformer (36) @@ -31,21 +31,23 @@ AdaptiveSparkPlan (54) +- ^ NoopFilter (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (53) - +- Exchange (52) - +- HashAggregate (51) - +- Exchange (50) - +- HashAggregate (49) - +- Project (48) - +- ShuffledHashJoin LeftSemi BuildRight (47) - :- Exchange (42) - : +- Project (41) - : +- Filter (40) - : +- Scan parquet (39) - +- Exchange (46) - +- Project (45) - +- Filter (44) - +- Scan parquet (43) + Sort (55) + +- Exchange (54) + +- HashAggregate (53) + +- Exchange (52) + +- HashAggregate (51) + +- Project (50) + +- SortMergeJoin LeftSemi (49) + :- Sort (43) + : +- Exchange (42) + : +- Project (41) + : +- Filter (40) + : +- Scan parquet (39) + +- Sort (48) + +- Exchange (47) + +- Project (46) + +- Filter (45) + +- Scan parquet (44) (1) Scan parquet @@ -224,61 +226,69 @@ Input [3]: [o_orderkey#X, o_orderdate#X, o_orderpriority#X] Input [2]: [o_orderkey#X, o_orderpriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) Scan parquet +(43) Sort +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(44) Scan parquet Output [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate)] ReadSchema: struct -(44) Filter +(45) Filter Input [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((isnotnull(l_commitdate#X) AND isnotnull(l_receiptdate#X)) AND (l_commitdate#X < l_receiptdate#X)) -(45) Project +(46) Project Output [1]: [l_orderkey#X] Input [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] -(46) Exchange +(47) Exchange Input [1]: [l_orderkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) ShuffledHashJoin +(48) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(49) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: LeftSemi Join condition: None -(48) Project +(50) Project Output [1]: [o_orderpriority#X] Input [2]: [o_orderkey#X, o_orderpriority#X] -(49) HashAggregate +(51) HashAggregate Input [1]: [o_orderpriority#X] Keys [1]: [o_orderpriority#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [o_orderpriority#X, count#X] -(50) Exchange +(52) Exchange Input [2]: [o_orderpriority#X, count#X] Arguments: hashpartitioning(o_orderpriority#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(51) HashAggregate +(53) HashAggregate Input [2]: [o_orderpriority#X, count#X] Keys [1]: [o_orderpriority#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [o_orderpriority#X, count(1)#X AS order_count#X] -(52) Exchange +(54) Exchange Input [2]: [o_orderpriority#X, order_count#X] Arguments: rangepartitioning(o_orderpriority#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(53) Sort +(55) Sort Input [2]: [o_orderpriority#X, order_count#X] Arguments: [o_orderpriority#X ASC NULLS FIRST], true, 0 -(54) AdaptiveSparkPlan +(56) AdaptiveSparkPlan Output [2]: [o_orderpriority#X, order_count#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/5.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/5.txt index 08e655f5aa81..67150984ab61 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/5.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/5.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (146) +AdaptiveSparkPlan (156) +- == Final Plan == VeloxColumnarToRowExec (106) +- ^ SortExecTransformer (104) @@ -83,45 +83,55 @@ AdaptiveSparkPlan (146) +- ^ NoopFilter (79) +- ^ Scan parquet (78) +- == Initial Plan == - Sort (145) - +- Exchange (144) - +- HashAggregate (143) - +- Exchange (142) - +- HashAggregate (141) - +- Project (140) - +- ShuffledHashJoin Inner BuildRight (139) - :- Exchange (134) - : +- Project (133) - : +- ShuffledHashJoin Inner BuildRight (132) - : :- Exchange (128) - : : +- Project (127) - : : +- ShuffledHashJoin Inner BuildRight (126) - : : :- Exchange (122) - : : : +- Project (121) - : : : +- ShuffledHashJoin Inner BuildRight (120) - : : : :- Exchange (116) - : : : : +- Project (115) - : : : : +- ShuffledHashJoin Inner BuildLeft (114) - : : : : :- Exchange (109) - : : : : : +- Filter (108) - : : : : : +- Scan parquet (107) - : : : : +- Exchange (113) - : : : : +- Project (112) - : : : : +- Filter (111) - : : : : +- Scan parquet (110) - : : : +- Exchange (119) - : : : +- Filter (118) - : : : +- Scan parquet (117) - : : +- Exchange (125) - : : +- Filter (124) - : : +- Scan parquet (123) - : +- Exchange (131) - : +- Filter (130) - : +- Scan parquet (129) - +- Exchange (138) - +- Project (137) - +- Filter (136) - +- Scan parquet (135) + Sort (155) + +- Exchange (154) + +- HashAggregate (153) + +- Exchange (152) + +- HashAggregate (151) + +- Project (150) + +- SortMergeJoin Inner (149) + :- Sort (143) + : +- Exchange (142) + : +- Project (141) + : +- SortMergeJoin Inner (140) + : :- Sort (135) + : : +- Exchange (134) + : : +- Project (133) + : : +- SortMergeJoin Inner (132) + : : :- Sort (127) + : : : +- Exchange (126) + : : : +- Project (125) + : : : +- SortMergeJoin Inner (124) + : : : :- Sort (119) + : : : : +- Exchange (118) + : : : : +- Project (117) + : : : : +- SortMergeJoin Inner (116) + : : : : :- Sort (110) + : : : : : +- Exchange (109) + : : : : : +- Filter (108) + : : : : : +- Scan parquet (107) + : : : : +- Sort (115) + : : : : +- Exchange (114) + : : : : +- Project (113) + : : : : +- Filter (112) + : : : : +- Scan parquet (111) + : : : +- Sort (123) + : : : +- Exchange (122) + : : : +- Filter (121) + : : : +- Scan parquet (120) + : : +- Sort (131) + : : +- Exchange (130) + : : +- Filter (129) + : : +- Scan parquet (128) + : +- Sort (139) + : +- Exchange (138) + : +- Filter (137) + : +- Scan parquet (136) + +- Sort (148) + +- Exchange (147) + +- Project (146) + +- Filter (145) + +- Scan parquet (144) (1) Scan parquet @@ -572,181 +582,221 @@ Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(110) Scan parquet +(110) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(111) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1994-01-01), LessThan(o_orderdate,1995-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(111) Filter +(112) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1994-01-01)) AND (o_orderdate#X < 1995-01-01)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(112) Project +(113) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] -(113) Exchange +(114) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(115) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(116) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join type: Inner Join condition: None -(115) Project +(117) Project Output [2]: [c_nationkey#X, o_orderkey#X] Input [4]: [c_custkey#X, c_nationkey#X, o_orderkey#X, o_custkey#X] -(116) Exchange +(118) Exchange Input [2]: [c_nationkey#X, o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(119) Sort +Input [2]: [c_nationkey#X, o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(120) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_orderkey), IsNotNull(l_suppkey)] ReadSchema: struct -(118) Filter +(121) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Condition : (isnotnull(l_orderkey#X) AND isnotnull(l_suppkey#X)) -(119) Exchange +(122) Exchange Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(123) Sort +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(124) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: Inner Join condition: None -(121) Project +(125) Project Output [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Input [6]: [c_nationkey#X, o_orderkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] -(122) Exchange +(126) Exchange Input [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(127) Sort +Input [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST, c_nationkey#X ASC NULLS FIRST], false, 0 + +(128) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(124) Filter +(129) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(125) Exchange +(130) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) ShuffledHashJoin +(131) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST, s_nationkey#X ASC NULLS FIRST], false, 0 + +(132) SortMergeJoin Left keys [2]: [l_suppkey#X, c_nationkey#X] Right keys [2]: [s_suppkey#X, s_nationkey#X] Join type: Inner Join condition: None -(127) Project +(133) Project Output [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] Input [6]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(128) Exchange +(134) Exchange Input [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Scan parquet +(135) Sort +Input [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(136) Scan parquet Output [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] ReadSchema: struct -(130) Filter +(137) Filter Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Condition : (isnotnull(n_nationkey#X) AND isnotnull(n_regionkey#X)) -(131) Exchange +(138) Exchange Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(139) Sort +Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(140) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(133) Project +(141) Project Output [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] Input [6]: [l_extendedprice#X, l_discount#X, s_nationkey#X, n_nationkey#X, n_name#X, n_regionkey#X] -(134) Exchange +(142) Exchange Input [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] Arguments: hashpartitioning(n_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(135) Scan parquet +(143) Sort +Input [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] +Arguments: [n_regionkey#X ASC NULLS FIRST], false, 0 + +(144) Scan parquet Output [2]: [r_regionkey#X, r_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(r_name), EqualTo(r_name,ASIA), IsNotNull(r_regionkey)] ReadSchema: struct -(136) Filter +(145) Filter Input [2]: [r_regionkey#X, r_name#X] Condition : ((isnotnull(r_name#X) AND (r_name#X = ASIA)) AND isnotnull(r_regionkey#X)) -(137) Project +(146) Project Output [1]: [r_regionkey#X] Input [2]: [r_regionkey#X, r_name#X] -(138) Exchange +(147) Exchange Input [1]: [r_regionkey#X] Arguments: hashpartitioning(r_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(139) ShuffledHashJoin +(148) Sort +Input [1]: [r_regionkey#X] +Arguments: [r_regionkey#X ASC NULLS FIRST], false, 0 + +(149) SortMergeJoin Left keys [1]: [n_regionkey#X] Right keys [1]: [r_regionkey#X] Join type: Inner Join condition: None -(140) Project +(150) Project Output [3]: [l_extendedprice#X, l_discount#X, n_name#X] Input [5]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X, r_regionkey#X] -(141) HashAggregate +(151) HashAggregate Input [3]: [l_extendedprice#X, l_discount#X, n_name#X] Keys [1]: [n_name#X] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [n_name#X, sum#X, isEmpty#X] -(142) Exchange +(152) Exchange Input [3]: [n_name#X, sum#X, isEmpty#X] Arguments: hashpartitioning(n_name#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(143) HashAggregate +(153) HashAggregate Input [3]: [n_name#X, sum#X, isEmpty#X] Keys [1]: [n_name#X] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [2]: [n_name#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X] -(144) Exchange +(154) Exchange Input [2]: [n_name#X, revenue#X] Arguments: rangepartitioning(revenue#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(145) Sort +(155) Sort Input [2]: [n_name#X, revenue#X] Arguments: [revenue#X DESC NULLS LAST], true, 0 -(146) AdaptiveSparkPlan +(156) AdaptiveSparkPlan Output [2]: [n_name#X, revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/7.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/7.txt index 71ade94be21d..65dfab993c3c 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/7.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/7.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (139) +AdaptiveSparkPlan (149) +- == Final Plan == VeloxColumnarToRowExec (101) +- ^ SortExecTransformer (99) @@ -79,43 +79,53 @@ AdaptiveSparkPlan (139) +- ShuffleQueryStage (79), Statistics(X) +- ReusedExchange (78) +- == Initial Plan == - Sort (138) - +- Exchange (137) - +- HashAggregate (136) - +- Exchange (135) - +- HashAggregate (134) - +- Project (133) - +- ShuffledHashJoin Inner BuildRight (132) - :- Exchange (128) - : +- Project (127) - : +- ShuffledHashJoin Inner BuildRight (126) - : :- Exchange (122) - : : +- Project (121) - : : +- ShuffledHashJoin Inner BuildRight (120) - : : :- Exchange (116) - : : : +- Project (115) - : : : +- ShuffledHashJoin Inner BuildRight (114) - : : : :- Exchange (110) - : : : : +- Project (109) - : : : : +- ShuffledHashJoin Inner BuildLeft (108) - : : : : :- Exchange (104) - : : : : : +- Filter (103) - : : : : : +- Scan parquet (102) - : : : : +- Exchange (107) - : : : : +- Filter (106) - : : : : +- Scan parquet (105) - : : : +- Exchange (113) - : : : +- Filter (112) - : : : +- Scan parquet (111) - : : +- Exchange (119) - : : +- Filter (118) - : : +- Scan parquet (117) - : +- Exchange (125) - : +- Filter (124) - : +- Scan parquet (123) - +- Exchange (131) - +- Filter (130) - +- Scan parquet (129) + Sort (148) + +- Exchange (147) + +- HashAggregate (146) + +- Exchange (145) + +- HashAggregate (144) + +- Project (143) + +- SortMergeJoin Inner (142) + :- Sort (137) + : +- Exchange (136) + : +- Project (135) + : +- SortMergeJoin Inner (134) + : :- Sort (129) + : : +- Exchange (128) + : : +- Project (127) + : : +- SortMergeJoin Inner (126) + : : :- Sort (121) + : : : +- Exchange (120) + : : : +- Project (119) + : : : +- SortMergeJoin Inner (118) + : : : :- Sort (113) + : : : : +- Exchange (112) + : : : : +- Project (111) + : : : : +- SortMergeJoin Inner (110) + : : : : :- Sort (105) + : : : : : +- Exchange (104) + : : : : : +- Filter (103) + : : : : : +- Scan parquet (102) + : : : : +- Sort (109) + : : : : +- Exchange (108) + : : : : +- Filter (107) + : : : : +- Scan parquet (106) + : : : +- Sort (117) + : : : +- Exchange (116) + : : : +- Filter (115) + : : : +- Scan parquet (114) + : : +- Sort (125) + : : +- Exchange (124) + : : +- Filter (123) + : : +- Scan parquet (122) + : +- Sort (133) + : +- Exchange (132) + : +- Filter (131) + : +- Scan parquet (130) + +- Sort (141) + +- Exchange (140) + +- Filter (139) + +- Scan parquet (138) (1) Scan parquet @@ -542,173 +552,213 @@ Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(105) Scan parquet +(105) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(106) Scan parquet Output [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1995-01-01), LessThanOrEqual(l_shipdate,1996-12-31), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(106) Filter +(107) Filter Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1995-01-01)) AND (l_shipdate#X <= 1996-12-31)) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(107) Exchange +(108) Exchange Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(108) ShuffledHashJoin +(109) Sort +Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(110) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [l_suppkey#X] Join type: Inner Join condition: None -(109) Project +(111) Project Output [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Input [7]: [s_suppkey#X, s_nationkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(110) Exchange +(112) Exchange Input [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(111) Scan parquet +(113) Sort +Input [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(114) Scan parquet Output [2]: [o_orderkey#X, o_custkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderkey), IsNotNull(o_custkey)] ReadSchema: struct -(112) Filter +(115) Filter Input [2]: [o_orderkey#X, o_custkey#X] Condition : (isnotnull(o_orderkey#X) AND isnotnull(o_custkey#X)) -(113) Exchange +(116) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(117) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(118) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join type: Inner Join condition: None -(115) Project +(119) Project Output [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] Input [7]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_orderkey#X, o_custkey#X] -(116) Exchange +(120) Exchange Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(121) Sort +Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(122) Scan parquet Output [2]: [c_custkey#X, c_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] ReadSchema: struct -(118) Filter +(123) Filter Input [2]: [c_custkey#X, c_nationkey#X] Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) -(119) Exchange +(124) Exchange Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(125) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(126) SortMergeJoin Left keys [1]: [o_custkey#X] Right keys [1]: [c_custkey#X] Join type: Inner Join condition: None -(121) Project +(127) Project Output [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X, c_custkey#X, c_nationkey#X] -(122) Exchange +(128) Exchange Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(129) Sort +Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(130) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), Or(EqualTo(n_name,FRANCE),EqualTo(n_name,GERMANY))] ReadSchema: struct -(124) Filter +(131) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : (isnotnull(n_nationkey#X) AND ((n_name#X = FRANCE) OR (n_name#X = GERMANY))) -(125) Exchange +(132) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) ShuffledHashJoin +(133) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(134) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(127) Project +(135) Project Output [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_nationkey#X, n_name#X] -(128) Exchange +(136) Exchange Input [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Scan parquet +(137) Sort +Input [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(138) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), Or(EqualTo(n_name,GERMANY),EqualTo(n_name,FRANCE))] ReadSchema: struct -(130) Filter +(139) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : (isnotnull(n_nationkey#X) AND ((n_name#X = GERMANY) OR (n_name#X = FRANCE))) -(131) Exchange +(140) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(141) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(142) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: (((n_name#X = FRANCE) AND (n_name#X = GERMANY)) OR ((n_name#X = GERMANY) AND (n_name#X = FRANCE))) -(133) Project +(143) Project Output [4]: [n_name#X AS supp_nation#X, n_name#X AS cust_nation#X, year(l_shipdate#X) AS l_year#X, (l_extendedprice#X * (1 - l_discount#X)) AS volume#X] Input [7]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X, n_nationkey#X, n_name#X] -(134) HashAggregate +(144) HashAggregate Input [4]: [supp_nation#X, cust_nation#X, l_year#X, volume#X] Keys [3]: [supp_nation#X, cust_nation#X, l_year#X] Functions [1]: [partial_sum(volume#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] -(135) Exchange +(145) Exchange Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] Arguments: hashpartitioning(supp_nation#X, cust_nation#X, l_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(136) HashAggregate +(146) HashAggregate Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] Keys [3]: [supp_nation#X, cust_nation#X, l_year#X] Functions [1]: [sum(volume#X)] Aggregate Attributes [1]: [sum(volume#X)#X] Results [4]: [supp_nation#X, cust_nation#X, l_year#X, sum(volume#X)#X AS revenue#X] -(137) Exchange +(147) Exchange Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: rangepartitioning(supp_nation#X ASC NULLS FIRST, cust_nation#X ASC NULLS FIRST, l_year#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(138) Sort +(148) Sort Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: [supp_nation#X ASC NULLS FIRST, cust_nation#X ASC NULLS FIRST, l_year#X ASC NULLS FIRST], true, 0 -(139) AdaptiveSparkPlan +(149) AdaptiveSparkPlan Output [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/8.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/8.txt index ddeab25c4569..61f6287c2429 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/8.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/8.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (193) +AdaptiveSparkPlan (207) +- == Final Plan == VeloxColumnarToRowExec (141) +- ^ SortExecTransformer (139) @@ -110,57 +110,71 @@ AdaptiveSparkPlan (193) +- ^ NoopFilter (113) +- ^ Scan parquet (112) +- == Initial Plan == - Sort (192) - +- Exchange (191) - +- HashAggregate (190) - +- Exchange (189) - +- HashAggregate (188) - +- Project (187) - +- ShuffledHashJoin Inner BuildRight (186) - :- Exchange (181) - : +- Project (180) - : +- ShuffledHashJoin Inner BuildRight (179) - : :- Exchange (175) - : : +- Project (174) - : : +- ShuffledHashJoin Inner BuildRight (173) - : : :- Exchange (169) - : : : +- Project (168) - : : : +- ShuffledHashJoin Inner BuildRight (167) - : : : :- Exchange (163) - : : : : +- Project (162) - : : : : +- ShuffledHashJoin Inner BuildRight (161) - : : : : :- Exchange (157) - : : : : : +- Project (156) - : : : : : +- ShuffledHashJoin Inner BuildRight (155) - : : : : : :- Exchange (151) - : : : : : : +- Project (150) - : : : : : : +- ShuffledHashJoin Inner BuildLeft (149) - : : : : : : :- Exchange (145) - : : : : : : : +- Project (144) - : : : : : : : +- Filter (143) - : : : : : : : +- Scan parquet (142) - : : : : : : +- Exchange (148) - : : : : : : +- Filter (147) - : : : : : : +- Scan parquet (146) - : : : : : +- Exchange (154) - : : : : : +- Filter (153) - : : : : : +- Scan parquet (152) - : : : : +- Exchange (160) - : : : : +- Filter (159) - : : : : +- Scan parquet (158) - : : : +- Exchange (166) - : : : +- Filter (165) - : : : +- Scan parquet (164) - : : +- Exchange (172) - : : +- Filter (171) - : : +- Scan parquet (170) - : +- Exchange (178) - : +- Filter (177) - : +- Scan parquet (176) - +- Exchange (185) - +- Project (184) - +- Filter (183) - +- Scan parquet (182) + Sort (206) + +- Exchange (205) + +- HashAggregate (204) + +- Exchange (203) + +- HashAggregate (202) + +- Project (201) + +- SortMergeJoin Inner (200) + :- Sort (194) + : +- Exchange (193) + : +- Project (192) + : +- SortMergeJoin Inner (191) + : :- Sort (186) + : : +- Exchange (185) + : : +- Project (184) + : : +- SortMergeJoin Inner (183) + : : :- Sort (178) + : : : +- Exchange (177) + : : : +- Project (176) + : : : +- SortMergeJoin Inner (175) + : : : :- Sort (170) + : : : : +- Exchange (169) + : : : : +- Project (168) + : : : : +- SortMergeJoin Inner (167) + : : : : :- Sort (162) + : : : : : +- Exchange (161) + : : : : : +- Project (160) + : : : : : +- SortMergeJoin Inner (159) + : : : : : :- Sort (154) + : : : : : : +- Exchange (153) + : : : : : : +- Project (152) + : : : : : : +- SortMergeJoin Inner (151) + : : : : : : :- Sort (146) + : : : : : : : +- Exchange (145) + : : : : : : : +- Project (144) + : : : : : : : +- Filter (143) + : : : : : : : +- Scan parquet (142) + : : : : : : +- Sort (150) + : : : : : : +- Exchange (149) + : : : : : : +- Filter (148) + : : : : : : +- Scan parquet (147) + : : : : : +- Sort (158) + : : : : : +- Exchange (157) + : : : : : +- Filter (156) + : : : : : +- Scan parquet (155) + : : : : +- Sort (166) + : : : : +- Exchange (165) + : : : : +- Filter (164) + : : : : +- Scan parquet (163) + : : : +- Sort (174) + : : : +- Exchange (173) + : : : +- Filter (172) + : : : +- Scan parquet (171) + : : +- Sort (182) + : : +- Exchange (181) + : : +- Filter (180) + : : +- Scan parquet (179) + : +- Sort (190) + : +- Exchange (189) + : +- Filter (188) + : +- Scan parquet (187) + +- Sort (199) + +- Exchange (198) + +- Project (197) + +- Filter (196) + +- Scan parquet (195) (1) Scan parquet @@ -757,235 +771,291 @@ Input [2]: [p_partkey#X, p_type#X] Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(146) Scan parquet +(146) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(147) Scan parquet Output [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(147) Filter +(148) Filter Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Condition : ((isnotnull(l_partkey#X) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(148) Exchange +(149) Exchange Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(149) ShuffledHashJoin +(150) Sort +Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(151) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join type: Inner Join condition: None -(150) Project +(152) Project Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Input [6]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] -(151) Exchange +(153) Exchange Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(152) Scan parquet +(154) Sort +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(155) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(153) Filter +(156) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(154) Exchange +(157) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(155) ShuffledHashJoin +(158) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(159) SortMergeJoin Left keys [1]: [l_suppkey#X] Right keys [1]: [s_suppkey#X] Join type: Inner Join condition: None -(156) Project +(160) Project Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Input [6]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(157) Exchange +(161) Exchange Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(158) Scan parquet +(162) Sort +Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(163) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1995-01-01), LessThanOrEqual(o_orderdate,1996-12-31), IsNotNull(o_orderkey), IsNotNull(o_custkey)] ReadSchema: struct -(159) Filter +(164) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1995-01-01)) AND (o_orderdate#X <= 1996-12-31)) AND isnotnull(o_orderkey#X)) AND isnotnull(o_custkey#X)) -(160) Exchange +(165) Exchange Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(161) ShuffledHashJoin +(166) Sort +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(167) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join type: Inner Join condition: None -(162) Project +(168) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] Input [7]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X] -(163) Exchange +(169) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(164) Scan parquet +(170) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(171) Scan parquet Output [2]: [c_custkey#X, c_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] ReadSchema: struct -(165) Filter +(172) Filter Input [2]: [c_custkey#X, c_nationkey#X] Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) -(166) Exchange +(173) Exchange Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(167) ShuffledHashJoin +(174) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(175) SortMergeJoin Left keys [1]: [o_custkey#X] Right keys [1]: [c_custkey#X] Join type: Inner Join condition: None -(168) Project +(176) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X, c_custkey#X, c_nationkey#X] -(169) Exchange +(177) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(170) Scan parquet +(178) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(179) Scan parquet Output [2]: [n_nationkey#X, n_regionkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] ReadSchema: struct -(171) Filter +(180) Filter Input [2]: [n_nationkey#X, n_regionkey#X] Condition : (isnotnull(n_nationkey#X) AND isnotnull(n_regionkey#X)) -(172) Exchange +(181) Exchange Input [2]: [n_nationkey#X, n_regionkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(173) ShuffledHashJoin +(182) Sort +Input [2]: [n_nationkey#X, n_regionkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(183) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(174) Project +(184) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X, n_nationkey#X, n_regionkey#X] -(175) Exchange +(185) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(176) Scan parquet +(186) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(187) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(177) Filter +(188) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(178) Exchange +(189) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(179) ShuffledHashJoin +(190) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(191) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(180) Project +(192) Project Output [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X, n_nationkey#X, n_name#X] -(181) Exchange +(193) Exchange Input [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] Arguments: hashpartitioning(n_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(182) Scan parquet +(194) Sort +Input [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] +Arguments: [n_regionkey#X ASC NULLS FIRST], false, 0 + +(195) Scan parquet Output [2]: [r_regionkey#X, r_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(r_name), EqualTo(r_name,AMERICA), IsNotNull(r_regionkey)] ReadSchema: struct -(183) Filter +(196) Filter Input [2]: [r_regionkey#X, r_name#X] Condition : ((isnotnull(r_name#X) AND (r_name#X = AMERICA)) AND isnotnull(r_regionkey#X)) -(184) Project +(197) Project Output [1]: [r_regionkey#X] Input [2]: [r_regionkey#X, r_name#X] -(185) Exchange +(198) Exchange Input [1]: [r_regionkey#X] Arguments: hashpartitioning(r_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(186) ShuffledHashJoin +(199) Sort +Input [1]: [r_regionkey#X] +Arguments: [r_regionkey#X ASC NULLS FIRST], false, 0 + +(200) SortMergeJoin Left keys [1]: [n_regionkey#X] Right keys [1]: [r_regionkey#X] Join type: Inner Join condition: None -(187) Project +(201) Project Output [3]: [year(o_orderdate#X) AS o_year#X, (l_extendedprice#X * (1 - l_discount#X)) AS volume#X, n_name#X AS nation#X] Input [6]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X, r_regionkey#X] -(188) HashAggregate +(202) HashAggregate Input [3]: [o_year#X, volume#X, nation#X] Keys [1]: [o_year#X] Functions [2]: [partial_sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END), partial_sum(volume#X)] Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Results [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] -(189) Exchange +(203) Exchange Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] Arguments: hashpartitioning(o_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(190) HashAggregate +(204) HashAggregate Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] Keys [1]: [o_year#X] Functions [2]: [sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END), sum(volume#X)] Aggregate Attributes [2]: [sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X, sum(volume#X)#X] Results [2]: [o_year#X, (sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X / sum(volume#X)#X) AS mkt_share#X] -(191) Exchange +(205) Exchange Input [2]: [o_year#X, mkt_share#X] Arguments: rangepartitioning(o_year#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(192) Sort +(206) Sort Input [2]: [o_year#X, mkt_share#X] Arguments: [o_year#X ASC NULLS FIRST], true, 0 -(193) AdaptiveSparkPlan +(207) AdaptiveSparkPlan Output [2]: [o_year#X, mkt_share#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/9.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/9.txt index 634e3516a710..4b983de23fde 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/9.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1-ras/spark34/9.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (145) +AdaptiveSparkPlan (155) +- == Final Plan == VeloxColumnarToRowExec (106) +- ^ SortExecTransformer (104) @@ -83,44 +83,54 @@ AdaptiveSparkPlan (145) +- ^ NoopFilter (79) +- ^ Scan parquet (78) +- == Initial Plan == - Sort (144) - +- Exchange (143) - +- HashAggregate (142) - +- Exchange (141) - +- HashAggregate (140) - +- Project (139) - +- ShuffledHashJoin Inner BuildRight (138) - :- Exchange (134) - : +- Project (133) - : +- ShuffledHashJoin Inner BuildRight (132) - : :- Exchange (128) - : : +- Project (127) - : : +- ShuffledHashJoin Inner BuildRight (126) - : : :- Exchange (122) - : : : +- Project (121) - : : : +- ShuffledHashJoin Inner BuildRight (120) - : : : :- Exchange (116) - : : : : +- Project (115) - : : : : +- ShuffledHashJoin Inner BuildLeft (114) - : : : : :- Exchange (110) - : : : : : +- Project (109) - : : : : : +- Filter (108) - : : : : : +- Scan parquet (107) - : : : : +- Exchange (113) - : : : : +- Filter (112) - : : : : +- Scan parquet (111) - : : : +- Exchange (119) - : : : +- Filter (118) - : : : +- Scan parquet (117) - : : +- Exchange (125) - : : +- Filter (124) - : : +- Scan parquet (123) - : +- Exchange (131) - : +- Filter (130) - : +- Scan parquet (129) - +- Exchange (137) - +- Filter (136) - +- Scan parquet (135) + Sort (154) + +- Exchange (153) + +- HashAggregate (152) + +- Exchange (151) + +- HashAggregate (150) + +- Project (149) + +- SortMergeJoin Inner (148) + :- Sort (143) + : +- Exchange (142) + : +- Project (141) + : +- SortMergeJoin Inner (140) + : :- Sort (135) + : : +- Exchange (134) + : : +- Project (133) + : : +- SortMergeJoin Inner (132) + : : :- Sort (127) + : : : +- Exchange (126) + : : : +- Project (125) + : : : +- SortMergeJoin Inner (124) + : : : :- Sort (119) + : : : : +- Exchange (118) + : : : : +- Project (117) + : : : : +- SortMergeJoin Inner (116) + : : : : :- Sort (111) + : : : : : +- Exchange (110) + : : : : : +- Project (109) + : : : : : +- Filter (108) + : : : : : +- Scan parquet (107) + : : : : +- Sort (115) + : : : : +- Exchange (114) + : : : : +- Filter (113) + : : : : +- Scan parquet (112) + : : : +- Sort (123) + : : : +- Exchange (122) + : : : +- Filter (121) + : : : +- Scan parquet (120) + : : +- Sort (131) + : : +- Exchange (130) + : : +- Filter (129) + : : +- Scan parquet (128) + : +- Sort (139) + : +- Exchange (138) + : +- Filter (137) + : +- Scan parquet (136) + +- Sort (147) + +- Exchange (146) + +- Filter (145) + +- Scan parquet (144) (1) Scan parquet @@ -575,173 +585,213 @@ Input [2]: [p_partkey#X, p_name#X] Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(111) Scan parquet +(111) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(112) Scan parquet Output [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(112) Filter +(113) Filter Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Condition : ((isnotnull(l_partkey#X) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(113) Exchange +(114) Exchange Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(115) Sort +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(116) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join type: Inner Join condition: None -(115) Project +(117) Project Output [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Input [7]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] -(116) Exchange +(118) Exchange Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(119) Sort +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(120) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(118) Filter +(121) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(119) Exchange +(122) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(123) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(124) SortMergeJoin Left keys [1]: [l_suppkey#X] Right keys [1]: [s_suppkey#X] Join type: Inner Join condition: None -(121) Project +(125) Project Output [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Input [8]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(122) Exchange +(126) Exchange Input [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(l_suppkey#X, l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(127) Sort +Input [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [l_suppkey#X ASC NULLS FIRST, l_partkey#X ASC NULLS FIRST], false, 0 + +(128) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey), IsNotNull(ps_partkey)] ReadSchema: struct -(124) Filter +(129) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Condition : (isnotnull(ps_suppkey#X) AND isnotnull(ps_partkey#X)) -(125) Exchange +(130) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) ShuffledHashJoin +(131) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST, ps_partkey#X ASC NULLS FIRST], false, 0 + +(132) SortMergeJoin Left keys [2]: [l_suppkey#X, l_partkey#X] Right keys [2]: [ps_suppkey#X, ps_partkey#X] Join type: Inner Join condition: None -(127) Project +(133) Project Output [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] Input [10]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_partkey#X, ps_suppkey#X, ps_supplycost#X] -(128) Exchange +(134) Exchange Input [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Scan parquet +(135) Sort +Input [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(136) Scan parquet Output [2]: [o_orderkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderkey)] ReadSchema: struct -(130) Filter +(137) Filter Input [2]: [o_orderkey#X, o_orderdate#X] Condition : isnotnull(o_orderkey#X) -(131) Exchange +(138) Exchange Input [2]: [o_orderkey#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(139) Sort +Input [2]: [o_orderkey#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(140) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join type: Inner Join condition: None -(133) Project +(141) Project Output [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] Input [8]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderkey#X, o_orderdate#X] -(134) Exchange +(142) Exchange Input [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(135) Scan parquet +(143) Sort +Input [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(144) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(136) Filter +(145) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(137) Exchange +(146) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(138) ShuffledHashJoin +(147) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(148) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(139) Project +(149) Project Output [3]: [n_name#X AS nation#X, year(o_orderdate#X) AS o_year#X, ((l_extendedprice#X * (1 - l_discount#X)) - (ps_supplycost#X * l_quantity#X)) AS amount#X] Input [8]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X, n_nationkey#X, n_name#X] -(140) HashAggregate +(150) HashAggregate Input [3]: [nation#X, o_year#X, amount#X] Keys [2]: [nation#X, o_year#X] Functions [1]: [partial_sum(amount#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [nation#X, o_year#X, sum#X, isEmpty#X] -(141) Exchange +(151) Exchange Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] Arguments: hashpartitioning(nation#X, o_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(142) HashAggregate +(152) HashAggregate Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] Keys [2]: [nation#X, o_year#X] Functions [1]: [sum(amount#X)] Aggregate Attributes [1]: [sum(amount#X)#X] Results [3]: [nation#X, o_year#X, sum(amount#X)#X AS sum_profit#X] -(143) Exchange +(153) Exchange Input [3]: [nation#X, o_year#X, sum_profit#X] Arguments: rangepartitioning(nation#X ASC NULLS FIRST, o_year#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(144) Sort +(154) Sort Input [3]: [nation#X, o_year#X, sum_profit#X] Arguments: [nation#X ASC NULLS FIRST, o_year#X DESC NULLS LAST], true, 0 -(145) AdaptiveSparkPlan +(155) AdaptiveSparkPlan Output [3]: [nation#X, o_year#X, sum_profit#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/10.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/10.txt index c5fcd91867cb..993884df3f3a 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/10.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/10.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (94) +AdaptiveSparkPlan (100) +- == Final Plan == VeloxColumnarToRowExec (67) +- TakeOrderedAndProjectExecTransformer (66) @@ -54,32 +54,38 @@ AdaptiveSparkPlan (94) +- ^ FilterExecTransformer (45) +- ^ Scan parquet (44) +- == Initial Plan == - TakeOrderedAndProject (93) - +- HashAggregate (92) - +- Exchange (91) - +- HashAggregate (90) - +- Project (89) - +- ShuffledHashJoin Inner BuildRight (88) - :- Exchange (84) - : +- Project (83) - : +- ShuffledHashJoin Inner BuildRight (82) - : :- Exchange (77) - : : +- Project (76) - : : +- ShuffledHashJoin Inner BuildRight (75) - : : :- Exchange (70) - : : : +- Filter (69) - : : : +- Scan parquet (68) - : : +- Exchange (74) - : : +- Project (73) - : : +- Filter (72) - : : +- Scan parquet (71) - : +- Exchange (81) - : +- Project (80) - : +- Filter (79) - : +- Scan parquet (78) - +- Exchange (87) - +- Filter (86) - +- Scan parquet (85) + TakeOrderedAndProject (99) + +- HashAggregate (98) + +- Exchange (97) + +- HashAggregate (96) + +- Project (95) + +- SortMergeJoin Inner (94) + :- Sort (89) + : +- Exchange (88) + : +- Project (87) + : +- SortMergeJoin Inner (86) + : :- Sort (80) + : : +- Exchange (79) + : : +- Project (78) + : : +- SortMergeJoin Inner (77) + : : :- Sort (71) + : : : +- Exchange (70) + : : : +- Filter (69) + : : : +- Scan parquet (68) + : : +- Sort (76) + : : +- Exchange (75) + : : +- Project (74) + : : +- Filter (73) + : : +- Scan parquet (72) + : +- Sort (85) + : +- Exchange (84) + : +- Project (83) + : +- Filter (82) + : +- Scan parquet (81) + +- Sort (93) + +- Exchange (92) + +- Filter (91) + +- Scan parquet (90) (1) Scan parquet @@ -371,116 +377,140 @@ Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) Input [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(71) Scan parquet +(71) Sort +Input [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(72) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1993-10-01), LessThan(o_orderdate,1994-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(72) Filter +(73) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1993-10-01)) AND (o_orderdate#X < 1994-01-01)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(73) Project +(74) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] -(74) Exchange +(75) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(75) ShuffledHashJoin +(76) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(77) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(76) Project +(78) Project Output [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, o_custkey#X] -(77) Exchange +(79) Exchange Input [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(78) Scan parquet +(80) Sort +Input [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(81) Scan parquet Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_returnflag), EqualTo(l_returnflag,R), IsNotNull(l_orderkey)] ReadSchema: struct -(79) Filter +(82) Filter Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] Condition : ((isnotnull(l_returnflag#X) AND (l_returnflag#X = R)) AND isnotnull(l_orderkey#X)) -(80) Project +(83) Project Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] -(81) Exchange +(84) Exchange Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(82) ShuffledHashJoin +(85) Sort +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(86) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(83) Project +(87) Project Output [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X] -(84) Exchange +(88) Exchange Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(85) Scan parquet +(89) Sort +Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(90) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(86) Filter +(91) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(87) Exchange +(92) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(88) ShuffledHashJoin +(93) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(94) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(89) Project +(95) Project Output [9]: [c_custkey#X, c_name#X, c_address#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_name#X] Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_nationkey#X, n_name#X] -(90) HashAggregate +(96) HashAggregate Input [9]: [c_custkey#X, c_name#X, c_address#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_name#X] Keys [7]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] -(91) Exchange +(97) Exchange Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] Arguments: hashpartitioning(c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(92) HashAggregate +(98) HashAggregate Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] Keys [7]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [8]: [c_custkey#X, c_name#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X AS revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] -(93) TakeOrderedAndProject +(99) TakeOrderedAndProject Input [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] Arguments: X, [revenue#X DESC NULLS LAST], [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] -(94) AdaptiveSparkPlan +(100) AdaptiveSparkPlan Output [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/11.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/11.txt index 59de06707aad..8142375d9ead 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/11.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/11.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (78) +AdaptiveSparkPlan (82) +- == Final Plan == VeloxColumnarToRowExec (56) +- ^ SortExecTransformer (54) @@ -45,27 +45,31 @@ AdaptiveSparkPlan (78) +- ^ FilterExecTransformer (28) +- ^ Scan parquet (27) +- == Initial Plan == - Sort (77) - +- Exchange (76) - +- Filter (75) - +- HashAggregate (74) - +- Exchange (73) - +- HashAggregate (72) - +- Project (71) - +- ShuffledHashJoin Inner BuildRight (70) - :- Exchange (65) - : +- Project (64) - : +- ShuffledHashJoin Inner BuildRight (63) - : :- Exchange (59) - : : +- Filter (58) - : : +- Scan parquet (57) - : +- Exchange (62) - : +- Filter (61) - : +- Scan parquet (60) - +- Exchange (69) - +- Project (68) - +- Filter (67) - +- Scan parquet (66) + Sort (81) + +- Exchange (80) + +- Filter (79) + +- HashAggregate (78) + +- Exchange (77) + +- HashAggregate (76) + +- Project (75) + +- SortMergeJoin Inner (74) + :- Sort (68) + : +- Exchange (67) + : +- Project (66) + : +- SortMergeJoin Inner (65) + : :- Sort (60) + : : +- Exchange (59) + : : +- Filter (58) + : : +- Scan parquet (57) + : +- Sort (64) + : +- Exchange (63) + : +- Filter (62) + : +- Scan parquet (61) + +- Sort (73) + +- Exchange (72) + +- Project (71) + +- Filter (70) + +- Scan parquet (69) (1) Scan parquet @@ -311,92 +315,108 @@ Condition : isnotnull(ps_suppkey#X) Input [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(60) Scan parquet +(60) Sort +Input [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(61) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(61) Filter +(62) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(62) Exchange +(63) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(63) ShuffledHashJoin +(64) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(65) SortMergeJoin Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(64) Project +(66) Project Output [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(65) Exchange +(67) Exchange Input [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(66) Scan parquet +(68) Sort +Input [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(69) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,GERMANY), IsNotNull(n_nationkey)] ReadSchema: struct -(67) Filter +(70) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = GERMANY)) AND isnotnull(n_nationkey#X)) -(68) Project +(71) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(69) Exchange +(72) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(70) ShuffledHashJoin +(73) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(74) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(71) Project +(75) Project Output [3]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X] Input [5]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(72) HashAggregate +(76) HashAggregate Input [3]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X] Keys [1]: [ps_partkey#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(cast(ps_availqty#X as decimal(10,0)) as decimal(12,2)))), DecimalType(23,2), true))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [ps_partkey#X, sum#X, isEmpty#X] -(73) Exchange +(77) Exchange Input [3]: [ps_partkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) HashAggregate +(78) HashAggregate Input [3]: [ps_partkey#X, sum#X, isEmpty#X] Keys [1]: [ps_partkey#X] Functions [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(cast(ps_availqty#X as decimal(10,0)) as decimal(12,2)))), DecimalType(23,2), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(cast(ps_availqty#X as decimal(10,0)) as decimal(12,2)))), DecimalType(23,2), true))#X] Results [2]: [ps_partkey#X, sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(cast(ps_availqty#X as decimal(10,0)) as decimal(12,2)))), DecimalType(23,2), true))#X AS value#X] -(75) Filter +(79) Filter Input [2]: [ps_partkey#X, value#X] Condition : (isnotnull(value#X) AND (cast(value#X as decimal(38,6)) > Subquery subquery#X, [id=#X])) -(76) Exchange +(80) Exchange Input [2]: [ps_partkey#X, value#X] Arguments: rangepartitioning(value#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(77) Sort +(81) Sort Input [2]: [ps_partkey#X, value#X] Arguments: [value#X DESC NULLS LAST], true, 0 -(78) AdaptiveSparkPlan +(82) AdaptiveSparkPlan Output [2]: [ps_partkey#X, value#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/12.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/12.txt index a8ac5d0d2c1b..802a79759235 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/12.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/12.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (53) +AdaptiveSparkPlan (55) +- == Final Plan == VeloxColumnarToRowExec (38) +- ^ SortExecTransformer (36) @@ -31,20 +31,22 @@ AdaptiveSparkPlan (53) +- ^ FilterExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (52) - +- Exchange (51) - +- HashAggregate (50) - +- Exchange (49) - +- HashAggregate (48) - +- Project (47) - +- ShuffledHashJoin Inner BuildLeft (46) - :- Exchange (41) - : +- Filter (40) - : +- Scan parquet (39) - +- Exchange (45) - +- Project (44) - +- Filter (43) - +- Scan parquet (42) + Sort (54) + +- Exchange (53) + +- HashAggregate (52) + +- Exchange (51) + +- HashAggregate (50) + +- Project (49) + +- SortMergeJoin Inner (48) + :- Sort (42) + : +- Exchange (41) + : +- Filter (40) + : +- Scan parquet (39) + +- Sort (47) + +- Exchange (46) + +- Project (45) + +- Filter (44) + +- Scan parquet (43) (1) Scan parquet @@ -218,60 +220,68 @@ Condition : isnotnull(o_orderkey#X) Input [2]: [o_orderkey#X, o_orderpriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(42) Scan parquet +(42) Sort +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(43) Scan parquet Output [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate), IsNotNull(l_shipdate), In(l_shipmode, [MAIL,SHIP]), GreaterThanOrEqual(l_receiptdate,1994-01-01), LessThan(l_receiptdate,1995-01-01), IsNotNull(l_orderkey)] ReadSchema: struct -(43) Filter +(44) Filter Input [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] Condition : ((((((((isnotnull(l_commitdate#X) AND isnotnull(l_receiptdate#X)) AND isnotnull(l_shipdate#X)) AND l_shipmode#X IN (MAIL,SHIP)) AND (l_commitdate#X < l_receiptdate#X)) AND (l_shipdate#X < l_commitdate#X)) AND (l_receiptdate#X >= 1994-01-01)) AND (l_receiptdate#X < 1995-01-01)) AND isnotnull(l_orderkey#X)) -(44) Project +(45) Project Output [2]: [l_orderkey#X, l_shipmode#X] Input [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] -(45) Exchange +(46) Exchange Input [2]: [l_orderkey#X, l_shipmode#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(46) ShuffledHashJoin +(47) Sort +Input [2]: [l_orderkey#X, l_shipmode#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(48) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(47) Project +(49) Project Output [2]: [o_orderpriority#X, l_shipmode#X] Input [4]: [o_orderkey#X, o_orderpriority#X, l_orderkey#X, l_shipmode#X] -(48) HashAggregate +(50) HashAggregate Input [2]: [o_orderpriority#X, l_shipmode#X] Keys [1]: [l_shipmode#X] Functions [2]: [partial_sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)] Aggregate Attributes [2]: [sum#X, sum#X] Results [3]: [l_shipmode#X, sum#X, sum#X] -(49) Exchange +(51) Exchange Input [3]: [l_shipmode#X, sum#X, sum#X] Arguments: hashpartitioning(l_shipmode#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(50) HashAggregate +(52) HashAggregate Input [3]: [l_shipmode#X, sum#X, sum#X] Keys [1]: [l_shipmode#X] Functions [2]: [sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END), sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)] Aggregate Attributes [2]: [sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X, sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X] Results [3]: [l_shipmode#X, sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X AS high_line_count#X, sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X AS low_line_count#X] -(51) Exchange +(53) Exchange Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: rangepartitioning(l_shipmode#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(52) Sort +(54) Sort Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: [l_shipmode#X ASC NULLS FIRST], true, 0 -(53) AdaptiveSparkPlan +(55) AdaptiveSparkPlan Output [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/13.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/13.txt index d65867ecf822..b9bf0f1fad60 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/13.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/13.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (56) +AdaptiveSparkPlan (58) +- == Final Plan == VeloxColumnarToRowExec (40) +- ^ SortExecTransformer (38) @@ -33,21 +33,23 @@ AdaptiveSparkPlan (56) +- ^ FilterExecTransformer (10) +- ^ Scan parquet (9) +- == Initial Plan == - Sort (55) - +- Exchange (54) - +- HashAggregate (53) - +- Exchange (52) - +- HashAggregate (51) - +- HashAggregate (50) - +- HashAggregate (49) - +- Project (48) - +- ShuffledHashJoin LeftOuter BuildRight (47) - :- Exchange (42) - : +- Scan parquet (41) - +- Exchange (46) - +- Project (45) - +- Filter (44) - +- Scan parquet (43) + Sort (57) + +- Exchange (56) + +- HashAggregate (55) + +- Exchange (54) + +- HashAggregate (53) + +- HashAggregate (52) + +- HashAggregate (51) + +- Project (50) + +- SortMergeJoin LeftOuter (49) + :- Sort (43) + : +- Exchange (42) + : +- Scan parquet (41) + +- Sort (48) + +- Exchange (47) + +- Project (46) + +- Filter (45) + +- Scan parquet (44) (1) Scan parquet @@ -229,74 +231,82 @@ ReadSchema: struct Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) Scan parquet +(43) Sort +Input [1]: [c_custkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(44) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_comment), IsNotNull(o_custkey)] ReadSchema: struct -(44) Filter +(45) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Condition : ((isnotnull(o_comment#X) AND NOT o_comment#X LIKE %special%requests%) AND isnotnull(o_custkey#X)) -(45) Project +(46) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] -(46) Exchange +(47) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) ShuffledHashJoin +(48) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(49) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(48) Project +(50) Project Output [2]: [c_custkey#X, o_orderkey#X] Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] -(49) HashAggregate +(51) HashAggregate Input [2]: [c_custkey#X, o_orderkey#X] Keys [1]: [c_custkey#X] Functions [1]: [partial_count(o_orderkey#X)] Aggregate Attributes [1]: [count#X] Results [2]: [c_custkey#X, count#X] -(50) HashAggregate +(52) HashAggregate Input [2]: [c_custkey#X, count#X] Keys [1]: [c_custkey#X] Functions [1]: [count(o_orderkey#X)] Aggregate Attributes [1]: [count(o_orderkey#X)#X] Results [1]: [count(o_orderkey#X)#X AS c_count#X] -(51) HashAggregate +(53) HashAggregate Input [1]: [c_count#X] Keys [1]: [c_count#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [c_count#X, count#X] -(52) Exchange +(54) Exchange Input [2]: [c_count#X, count#X] Arguments: hashpartitioning(c_count#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(53) HashAggregate +(55) HashAggregate Input [2]: [c_count#X, count#X] Keys [1]: [c_count#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [c_count#X, count(1)#X AS custdist#X] -(54) Exchange +(56) Exchange Input [2]: [c_count#X, custdist#X] Arguments: rangepartitioning(custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(55) Sort +(57) Sort Input [2]: [c_count#X, custdist#X] Arguments: [custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST], true, 0 -(56) AdaptiveSparkPlan +(58) AdaptiveSparkPlan Output [2]: [c_count#X, custdist#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/14.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/14.txt index 2bc0be8fcb67..425c55f5a4ce 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/14.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/14.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (37) +AdaptiveSparkPlan (39) +- == Final Plan == VeloxColumnarToRowExec (25) +- ^ ProjectExecTransformer (23) @@ -22,17 +22,19 @@ AdaptiveSparkPlan (37) +- ^ FilterExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - HashAggregate (36) - +- HashAggregate (35) - +- Project (34) - +- ShuffledHashJoin Inner BuildRight (33) - :- Exchange (29) - : +- Project (28) - : +- Filter (27) - : +- Scan parquet (26) - +- Exchange (32) - +- Filter (31) - +- Scan parquet (30) + HashAggregate (38) + +- HashAggregate (37) + +- Project (36) + +- SortMergeJoin Inner (35) + :- Sort (30) + : +- Exchange (29) + : +- Project (28) + : +- Filter (27) + : +- Scan parquet (26) + +- Sort (34) + +- Exchange (33) + +- Filter (32) + +- Scan parquet (31) (1) Scan parquet @@ -162,44 +164,52 @@ Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(30) Scan parquet +(30) Sort +Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(31) Scan parquet Output [2]: [p_partkey#X, p_type#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_partkey)] ReadSchema: struct -(31) Filter +(32) Filter Input [2]: [p_partkey#X, p_type#X] Condition : isnotnull(p_partkey#X) -(32) Exchange +(33) Exchange Input [2]: [p_partkey#X, p_type#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(33) ShuffledHashJoin +(34) Sort +Input [2]: [p_partkey#X, p_type#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(35) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(34) Project +(36) Project Output [3]: [l_extendedprice#X, l_discount#X, p_type#X] Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] -(35) HashAggregate +(37) HashAggregate Input [3]: [l_extendedprice#X, l_discount#X, p_type#X] Keys: [] Functions [2]: [partial_sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END), partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Results [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -(36) HashAggregate +(38) HashAggregate Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Keys: [] Functions [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END), sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END)#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [1]: [CheckOverflow((promote_precision(CheckOverflow((100.0000 * promote_precision(sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) ELSE 0.0000 END)#X)), DecimalType(38,6), true)) / promote_precision(cast(sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X as decimal(38,6)))), DecimalType(38,6), true) AS promo_revenue#X] -(37) AdaptiveSparkPlan +(39) AdaptiveSparkPlan Output [1]: [promo_revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/15.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/15.txt index 0d21930825c7..f003eed994d0 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/15.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/15.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (48) +AdaptiveSparkPlan (50) +- == Final Plan == VeloxColumnarToRowExec (33) +- ^ SortExecTransformer (31) @@ -28,20 +28,22 @@ AdaptiveSparkPlan (48) +- ^ FilterExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (47) - +- Exchange (46) - +- Project (45) - +- ShuffledHashJoin Inner BuildLeft (44) - :- Exchange (36) - : +- Filter (35) - : +- Scan parquet (34) - +- Filter (43) - +- HashAggregate (42) - +- Exchange (41) - +- HashAggregate (40) - +- Project (39) - +- Filter (38) - +- Scan parquet (37) + Sort (49) + +- Exchange (48) + +- Project (47) + +- SortMergeJoin Inner (46) + :- Sort (37) + : +- Exchange (36) + : +- Filter (35) + : +- Scan parquet (34) + +- Sort (45) + +- Filter (44) + +- HashAggregate (43) + +- Exchange (42) + +- HashAggregate (41) + +- Project (40) + +- Filter (39) + +- Scan parquet (38) (1) Scan parquet @@ -197,60 +199,68 @@ Condition : isnotnull(s_suppkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(37) Scan parquet +(37) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(38) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01), IsNotNull(l_suppkey)] ReadSchema: struct -(38) Filter +(39) Filter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : (((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) AND isnotnull(l_suppkey#X)) -(39) Project +(40) Project Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(40) HashAggregate +(41) HashAggregate Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(41) Exchange +(42) Exchange Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(42) HashAggregate +(43) HashAggregate Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [2]: [l_suppkey#X AS supplier_no#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X AS total_revenue#X] -(43) Filter +(44) Filter Input [2]: [supplier_no#X, total_revenue#X] Condition : (isnotnull(total_revenue#X) AND (total_revenue#X = Subquery subquery#X, [id=#X])) -(44) ShuffledHashJoin +(45) Sort +Input [2]: [supplier_no#X, total_revenue#X] +Arguments: [supplier_no#X ASC NULLS FIRST], false, 0 + +(46) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [supplier_no#X] Join condition: None -(45) Project +(47) Project Output [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Input [6]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, supplier_no#X, total_revenue#X] -(46) Exchange +(48) Exchange Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: rangepartitioning(s_suppkey#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) Sort +(49) Sort Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: [s_suppkey#X ASC NULLS FIRST], true, 0 -(48) AdaptiveSparkPlan +(50) AdaptiveSparkPlan Output [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/16.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/16.txt index cd3e53ad7bd6..c9374b01ff02 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/16.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/16.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (69) +AdaptiveSparkPlan (71) +- == Final Plan == VeloxColumnarToRowExec (47) +- ^ SortExecTransformer (45) @@ -38,27 +38,29 @@ AdaptiveSparkPlan (69) +- ^ FilterExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (68) - +- Exchange (67) - +- HashAggregate (66) - +- Exchange (65) - +- HashAggregate (64) - +- HashAggregate (63) - +- Exchange (62) - +- HashAggregate (61) - +- Project (60) - +- ShuffledHashJoin Inner BuildRight (59) - :- Exchange (55) - : +- BroadcastHashJoin LeftAnti BuildRight (54) - : :- Filter (49) - : : +- Scan parquet (48) - : +- BroadcastExchange (53) - : +- Project (52) - : +- Filter (51) - : +- Scan parquet (50) - +- Exchange (58) - +- Filter (57) - +- Scan parquet (56) + Sort (70) + +- Exchange (69) + +- HashAggregate (68) + +- Exchange (67) + +- HashAggregate (66) + +- HashAggregate (65) + +- Exchange (64) + +- HashAggregate (63) + +- Project (62) + +- SortMergeJoin Inner (61) + :- Sort (56) + : +- Exchange (55) + : +- BroadcastHashJoin LeftAnti BuildRight (54) + : :- Filter (49) + : : +- Scan parquet (48) + : +- BroadcastExchange (53) + : +- Project (52) + : +- Filter (51) + : +- Scan parquet (50) + +- Sort (60) + +- Exchange (59) + +- Filter (58) + +- Scan parquet (57) (1) Scan parquet @@ -296,74 +298,82 @@ Join condition: None Input [2]: [ps_partkey#X, ps_suppkey#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(56) Scan parquet +(56) Sort +Input [2]: [ps_partkey#X, ps_suppkey#X] +Arguments: [ps_partkey#X ASC NULLS FIRST], false, 0 + +(57) Scan parquet Output [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_brand), IsNotNull(p_type), Not(EqualTo(p_brand,Brand#X)), Not(StringStartsWith(p_type,MEDIUM POLISHED)), In(p_size, [14,19,23,3,36,45,49,9]), IsNotNull(p_partkey)] ReadSchema: struct -(57) Filter +(58) Filter Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Condition : (((((isnotnull(p_brand#X) AND isnotnull(p_type#X)) AND NOT (p_brand#X = Brand#X)) AND NOT StartsWith(p_type#X, MEDIUM POLISHED)) AND p_size#X IN (49,14,23,45,19,3,36,9)) AND isnotnull(p_partkey#X)) -(58) Exchange +(59) Exchange Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(59) ShuffledHashJoin +(60) Sort +Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(61) SortMergeJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(60) Project +(62) Project Output [4]: [ps_suppkey#X, p_brand#X, p_type#X, p_size#X] Input [6]: [ps_partkey#X, ps_suppkey#X, p_partkey#X, p_brand#X, p_type#X, p_size#X] -(61) HashAggregate +(63) HashAggregate Input [4]: [ps_suppkey#X, p_brand#X, p_type#X, p_size#X] Keys [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Functions: [] Aggregate Attributes: [] Results [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] -(62) Exchange +(64) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Arguments: hashpartitioning(p_brand#X, p_type#X, p_size#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(63) HashAggregate +(65) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Keys [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Functions: [] Aggregate Attributes: [] Results [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] -(64) HashAggregate +(66) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Keys [3]: [p_brand#X, p_type#X, p_size#X] Functions [1]: [partial_count(distinct ps_suppkey#X)] Aggregate Attributes [1]: [count(ps_suppkey#X)#X] Results [4]: [p_brand#X, p_type#X, p_size#X, count#X] -(65) Exchange +(67) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] Arguments: hashpartitioning(p_brand#X, p_type#X, p_size#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(66) HashAggregate +(68) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] Keys [3]: [p_brand#X, p_type#X, p_size#X] Functions [1]: [count(distinct ps_suppkey#X)] Aggregate Attributes [1]: [count(ps_suppkey#X)#X] Results [4]: [p_brand#X, p_type#X, p_size#X, count(ps_suppkey#X)#X AS supplier_cnt#X] -(67) Exchange +(69) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: rangepartitioning(supplier_cnt#X DESC NULLS LAST, p_brand#X ASC NULLS FIRST, p_type#X ASC NULLS FIRST, p_size#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(68) Sort +(70) Sort Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: [supplier_cnt#X DESC NULLS LAST, p_brand#X ASC NULLS FIRST, p_type#X ASC NULLS FIRST, p_size#X ASC NULLS FIRST], true, 0 -(69) AdaptiveSparkPlan +(71) AdaptiveSparkPlan Output [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/17.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/17.txt index fc17c87d7df0..69f50fa16ab0 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/17.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/17.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (60) +AdaptiveSparkPlan (63) +- == Final Plan == VeloxColumnarToRowExec (40) +- ^ ProjectExecTransformer (38) @@ -35,25 +35,28 @@ AdaptiveSparkPlan (60) +- ^ FilterExecTransformer (22) +- ^ Scan parquet (21) +- == Initial Plan == - HashAggregate (59) - +- HashAggregate (58) - +- Project (57) - +- ShuffledHashJoin Inner BuildRight (56) - :- Project (49) - : +- ShuffledHashJoin Inner BuildRight (48) - : :- Exchange (43) - : : +- Filter (42) - : : +- Scan parquet (41) - : +- Exchange (47) - : +- Project (46) - : +- Filter (45) - : +- Scan parquet (44) - +- Filter (55) - +- HashAggregate (54) - +- Exchange (53) - +- HashAggregate (52) - +- Filter (51) - +- Scan parquet (50) + HashAggregate (62) + +- HashAggregate (61) + +- Project (60) + +- SortMergeJoin Inner (59) + :- Project (51) + : +- SortMergeJoin Inner (50) + : :- Sort (44) + : : +- Exchange (43) + : : +- Filter (42) + : : +- Scan parquet (41) + : +- Sort (49) + : +- Exchange (48) + : +- Project (47) + : +- Filter (46) + : +- Scan parquet (45) + +- Sort (58) + +- Filter (57) + +- HashAggregate (56) + +- Exchange (55) + +- HashAggregate (54) + +- Filter (53) + +- Scan parquet (52) (1) Scan parquet @@ -247,90 +250,102 @@ Condition : (isnotnull(l_partkey#X) AND isnotnull(l_quantity#X)) Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(44) Scan parquet +(44) Sort +Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(45) Scan parquet Output [3]: [p_partkey#X, p_brand#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_brand), IsNotNull(p_container), EqualTo(p_brand,Brand#X), EqualTo(p_container,MED BOX), IsNotNull(p_partkey)] ReadSchema: struct -(45) Filter +(46) Filter Input [3]: [p_partkey#X, p_brand#X, p_container#X] Condition : ((((isnotnull(p_brand#X) AND isnotnull(p_container#X)) AND (p_brand#X = Brand#X)) AND (p_container#X = MED BOX)) AND isnotnull(p_partkey#X)) -(46) Project +(47) Project Output [1]: [p_partkey#X] Input [3]: [p_partkey#X, p_brand#X, p_container#X] -(47) Exchange +(48) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) ShuffledHashJoin +(49) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(50) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(49) Project +(51) Project Output [3]: [l_quantity#X, l_extendedprice#X, p_partkey#X] Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, p_partkey#X] -(50) Scan parquet +(52) Scan parquet Output [2]: [l_partkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey)] ReadSchema: struct -(51) Filter +(53) Filter Input [2]: [l_partkey#X, l_quantity#X] Condition : isnotnull(l_partkey#X) -(52) HashAggregate +(54) HashAggregate Input [2]: [l_partkey#X, l_quantity#X] Keys [1]: [l_partkey#X] Functions [1]: [partial_avg(l_quantity#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [3]: [l_partkey#X, sum#X, count#X] -(53) Exchange +(55) Exchange Input [3]: [l_partkey#X, sum#X, count#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(54) HashAggregate +(56) HashAggregate Input [3]: [l_partkey#X, sum#X, count#X] Keys [1]: [l_partkey#X] Functions [1]: [avg(l_quantity#X)] Aggregate Attributes [1]: [avg(l_quantity#X)#X] Results [2]: [CheckOverflow((0.200000 * promote_precision(avg(l_quantity#X)#X)), DecimalType(18,7), true) AS (0.2 * avg(l_quantity))#X, l_partkey#X] -(55) Filter +(57) Filter Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] Condition : isnotnull((0.2 * avg(l_quantity))#X) -(56) ShuffledHashJoin +(58) Sort +Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(59) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join condition: (cast(l_quantity#X as decimal(18,7)) < (0.2 * avg(l_quantity))#X) -(57) Project +(60) Project Output [1]: [l_extendedprice#X] Input [5]: [l_quantity#X, l_extendedprice#X, p_partkey#X, (0.2 * avg(l_quantity))#X, l_partkey#X] -(58) HashAggregate +(61) HashAggregate Input [1]: [l_extendedprice#X] Keys: [] Functions [1]: [partial_sum(l_extendedprice#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(59) HashAggregate +(62) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(l_extendedprice#X)] Aggregate Attributes [1]: [sum(l_extendedprice#X)#X] Results [1]: [CheckOverflow((promote_precision(sum(l_extendedprice#X)#X) / 7.00), DecimalType(27,6), true) AS avg_yearly#X] -(60) AdaptiveSparkPlan +(63) AdaptiveSparkPlan Output [1]: [avg_yearly#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/18.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/18.txt index fc65f4b52897..96fa1cd82606 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/18.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/18.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (103) +AdaptiveSparkPlan (110) +- == Final Plan == VeloxColumnarToRowExec (70) +- TakeOrderedAndProjectExecTransformer (69) @@ -58,38 +58,45 @@ AdaptiveSparkPlan (103) +- ShuffleQueryStage (57) +- ReusedExchange (56) +- == Initial Plan == - TakeOrderedAndProject (102) - +- HashAggregate (101) - +- HashAggregate (100) - +- Project (99) - +- ShuffledHashJoin Inner BuildRight (98) - :- Exchange (87) - : +- Project (86) - : +- ShuffledHashJoin Inner BuildLeft (85) - : :- Exchange (73) - : : +- Filter (72) - : : +- Scan parquet (71) - : +- Exchange (84) - : +- ShuffledHashJoin LeftSemi BuildRight (83) - : :- Exchange (76) - : : +- Filter (75) - : : +- Scan parquet (74) - : +- Project (82) - : +- Filter (81) - : +- HashAggregate (80) - : +- Exchange (79) - : +- HashAggregate (78) - : +- Scan parquet (77) - +- ShuffledHashJoin LeftSemi BuildRight (97) - :- Exchange (90) - : +- Filter (89) - : +- Scan parquet (88) - +- Project (96) - +- Filter (95) - +- HashAggregate (94) - +- Exchange (93) - +- HashAggregate (92) - +- Scan parquet (91) + TakeOrderedAndProject (109) + +- HashAggregate (108) + +- HashAggregate (107) + +- Project (106) + +- SortMergeJoin Inner (105) + :- Sort (92) + : +- Exchange (91) + : +- Project (90) + : +- SortMergeJoin Inner (89) + : :- Sort (74) + : : +- Exchange (73) + : : +- Filter (72) + : : +- Scan parquet (71) + : +- Sort (88) + : +- Exchange (87) + : +- SortMergeJoin LeftSemi (86) + : :- Sort (78) + : : +- Exchange (77) + : : +- Filter (76) + : : +- Scan parquet (75) + : +- Sort (85) + : +- Project (84) + : +- Filter (83) + : +- HashAggregate (82) + : +- Exchange (81) + : +- HashAggregate (80) + : +- Scan parquet (79) + +- SortMergeJoin LeftSemi (104) + :- Sort (96) + : +- Exchange (95) + : +- Filter (94) + : +- Scan parquet (93) + +- Sort (103) + +- Project (102) + +- Filter (101) + +- HashAggregate (100) + +- Exchange (99) + +- HashAggregate (98) + +- Scan parquet (97) (1) Scan parquet @@ -401,154 +408,182 @@ Condition : isnotnull(c_custkey#X) Input [2]: [c_custkey#X, c_name#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) Scan parquet +(74) Sort +Input [2]: [c_custkey#X, c_name#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(75) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(75) Filter +(76) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Condition : (isnotnull(o_custkey#X) AND isnotnull(o_orderkey#X)) -(76) Exchange +(77) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(77) Scan parquet +(78) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(79) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(78) HashAggregate +(80) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(79) Exchange +(81) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(80) HashAggregate +(82) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(81) Filter +(83) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(82) Project +(84) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(83) ShuffledHashJoin +(85) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(86) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(84) Exchange +(87) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(85) ShuffledHashJoin +(88) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(89) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(86) Project +(90) Project Output [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] -(87) Exchange +(91) Exchange Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(88) Scan parquet +(92) Sort +Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(93) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_orderkey)] ReadSchema: struct -(89) Filter +(94) Filter Input [2]: [l_orderkey#X, l_quantity#X] Condition : isnotnull(l_orderkey#X) -(90) Exchange +(95) Exchange Input [2]: [l_orderkey#X, l_quantity#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(91) Scan parquet +(96) Sort +Input [2]: [l_orderkey#X, l_quantity#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(97) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(92) HashAggregate +(98) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(93) Exchange +(99) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(94) HashAggregate +(100) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(95) Filter +(101) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(96) Project +(102) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(97) ShuffledHashJoin +(103) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(104) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(98) ShuffledHashJoin +(105) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(99) Project +(106) Project Output [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_orderkey#X, l_quantity#X] -(100) HashAggregate +(107) HashAggregate Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] -(101) HashAggregate +(108) HashAggregate Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity#X)#X AS sum(l_quantity)#X] -(102) TakeOrderedAndProject +(109) TakeOrderedAndProject Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: X, [o_totalprice#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] -(103) AdaptiveSparkPlan +(110) AdaptiveSparkPlan Output [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/19.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/19.txt index d3d74c5ba792..ae0feb5dfd56 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/19.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/19.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (36) +AdaptiveSparkPlan (38) +- == Final Plan == VeloxColumnarToRowExec (24) +- ^ RegularHashAggregateExecTransformer (22) @@ -21,17 +21,19 @@ AdaptiveSparkPlan (36) +- ^ FilterExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - HashAggregate (35) - +- HashAggregate (34) - +- Project (33) - +- ShuffledHashJoin Inner BuildRight (32) - :- Exchange (28) - : +- Project (27) - : +- Filter (26) - : +- Scan parquet (25) - +- Exchange (31) - +- Filter (30) - +- Scan parquet (29) + HashAggregate (37) + +- HashAggregate (36) + +- Project (35) + +- SortMergeJoin Inner (34) + :- Sort (29) + : +- Exchange (28) + : +- Project (27) + : +- Filter (26) + : +- Scan parquet (25) + +- Sort (33) + +- Exchange (32) + +- Filter (31) + +- Scan parquet (30) (1) Scan parquet @@ -157,44 +159,52 @@ Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipin Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(29) Scan parquet +(29) Sort +Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(30) Scan parquet Output [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_size), GreaterThanOrEqual(p_size,1), IsNotNull(p_partkey), Or(Or(And(And(EqualTo(p_brand,Brand#X),In(p_container, [SM BOX,SM CASE,SM PACK,SM PKG])),LessThanOrEqual(p_size,5)),And(And(EqualTo(p_brand,Brand#X),In(p_container, [MED BAG,MED BOX,MED PACK,MED PKG])),LessThanOrEqual(p_size,10))),And(And(EqualTo(p_brand,Brand#X),In(p_container, [LG BOX,LG CASE,LG PACK,LG PKG])),LessThanOrEqual(p_size,15)))] ReadSchema: struct -(30) Filter +(31) Filter Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Condition : (((isnotnull(p_size#X) AND (p_size#X >= 1)) AND isnotnull(p_partkey#X)) AND (((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (p_size#X <= 5)) OR (((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (p_size#X <= 10))) OR (((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (p_size#X <= 15)))) -(31) Exchange +(32) Exchange Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(32) ShuffledHashJoin +(33) Sort +Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(34) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: (((((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (l_quantity#X >= 1.00)) AND (l_quantity#X <= 11.00)) AND (p_size#X <= 5)) OR (((((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (l_quantity#X >= 10.00)) AND (l_quantity#X <= 20.00)) AND (p_size#X <= 10))) OR (((((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (l_quantity#X >= 20.00)) AND (l_quantity#X <= 30.00)) AND (p_size#X <= 15))) -(33) Project +(35) Project Output [2]: [l_extendedprice#X, l_discount#X] Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_brand#X, p_size#X, p_container#X] -(34) HashAggregate +(36) HashAggregate Input [2]: [l_extendedprice#X, l_discount#X] Keys: [] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(35) HashAggregate +(37) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X AS revenue#X] -(36) AdaptiveSparkPlan +(38) AdaptiveSparkPlan Output [1]: [revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/20.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/20.txt index df1ae98f903e..bab785551636 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/20.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/20.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (136) +AdaptiveSparkPlan (146) +- == Final Plan == VeloxColumnarToRowExec (96) +- ^ SortExecTransformer (94) @@ -76,45 +76,55 @@ AdaptiveSparkPlan (136) +- ^ FilterExecTransformer (78) +- ^ Scan parquet (77) +- == Initial Plan == - Sort (135) - +- Exchange (134) - +- Project (133) - +- ShuffledHashJoin Inner BuildRight (132) - :- Exchange (127) - : +- Project (126) - : +- ShuffledHashJoin LeftSemi BuildRight (125) - : :- Exchange (99) - : : +- Filter (98) - : : +- Scan parquet (97) - : +- Exchange (124) - : +- Project (123) - : +- ShuffledHashJoin Inner BuildLeft (122) - : :- Exchange (108) - : : +- ShuffledHashJoin LeftSemi BuildRight (107) - : : :- Exchange (102) - : : : +- Filter (101) - : : : +- Scan parquet (100) - : : +- Exchange (106) - : : +- Project (105) - : : +- Filter (104) - : : +- Scan parquet (103) - : +- Exchange (121) - : +- Filter (120) - : +- HashAggregate (119) - : +- HashAggregate (118) - : +- ShuffledHashJoin LeftSemi BuildRight (117) - : :- Exchange (112) - : : +- Project (111) - : : +- Filter (110) - : : +- Scan parquet (109) - : +- Exchange (116) - : +- Project (115) - : +- Filter (114) - : +- Scan parquet (113) - +- Exchange (131) - +- Project (130) - +- Filter (129) - +- Scan parquet (128) + Sort (145) + +- Exchange (144) + +- Project (143) + +- SortMergeJoin Inner (142) + :- Sort (136) + : +- Exchange (135) + : +- Project (134) + : +- SortMergeJoin LeftSemi (133) + : :- Sort (100) + : : +- Exchange (99) + : : +- Filter (98) + : : +- Scan parquet (97) + : +- Sort (132) + : +- Exchange (131) + : +- Project (130) + : +- SortMergeJoin Inner (129) + : :- Sort (112) + : : +- Exchange (111) + : : +- SortMergeJoin LeftSemi (110) + : : :- Sort (104) + : : : +- Exchange (103) + : : : +- Filter (102) + : : : +- Scan parquet (101) + : : +- Sort (109) + : : +- Exchange (108) + : : +- Project (107) + : : +- Filter (106) + : : +- Scan parquet (105) + : +- Sort (128) + : +- Exchange (127) + : +- Filter (126) + : +- HashAggregate (125) + : +- HashAggregate (124) + : +- SortMergeJoin LeftSemi (123) + : :- Sort (117) + : : +- Exchange (116) + : : +- Project (115) + : : +- Filter (114) + : : +- Scan parquet (113) + : +- Sort (122) + : +- Exchange (121) + : +- Project (120) + : +- Filter (119) + : +- Scan parquet (118) + +- Sort (141) + +- Exchange (140) + +- Project (139) + +- Filter (138) + +- Scan parquet (137) (1) Scan parquet @@ -518,176 +528,216 @@ Condition : isnotnull(s_nationkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(100) Scan parquet +(100) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(101) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_availqty), IsNotNull(ps_partkey), IsNotNull(ps_suppkey)] ReadSchema: struct -(101) Filter +(102) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Condition : ((isnotnull(ps_availqty#X) AND isnotnull(ps_partkey#X)) AND isnotnull(ps_suppkey#X)) -(102) Exchange +(103) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(103) Scan parquet +(104) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: [ps_partkey#X ASC NULLS FIRST], false, 0 + +(105) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(104) Filter +(106) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(105) Project +(107) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(106) Exchange +(108) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(107) ShuffledHashJoin +(109) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(110) SortMergeJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(108) Exchange +(111) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(109) Scan parquet +(112) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: [ps_partkey#X ASC NULLS FIRST, ps_suppkey#X ASC NULLS FIRST], false, 0 + +(113) Scan parquet Output [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)] ReadSchema: struct -(110) Filter +(114) Filter Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND isnotnull(l_partkey#X)) AND isnotnull(l_suppkey#X)) -(111) Project +(115) Project Output [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] -(112) Exchange +(116) Exchange Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(113) Scan parquet +(117) Sort +Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(118) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(114) Filter +(119) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(115) Project +(120) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(116) Exchange +(121) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) ShuffledHashJoin +(122) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(123) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(118) HashAggregate +(124) HashAggregate Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -(119) HashAggregate +(125) HashAggregate Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [CheckOverflow((0.50 * promote_precision(sum(l_quantity#X)#X)), DecimalType(24,3), true) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(120) Filter +(126) Filter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Condition : isnotnull((0.5 * sum(l_quantity))#X) -(121) Exchange +(127) Exchange Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(122) ShuffledHashJoin +(128) Sort +Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] +Arguments: [l_partkey#X ASC NULLS FIRST, l_suppkey#X ASC NULLS FIRST], false, 0 + +(129) SortMergeJoin Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join condition: (cast(cast(ps_availqty#X as decimal(10,0)) as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(123) Project +(130) Project Output [1]: [ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(124) Exchange +(131) Exchange Input [1]: [ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(125) ShuffledHashJoin +(132) Sort +Input [1]: [ps_suppkey#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(133) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join condition: None -(126) Project +(134) Project Output [3]: [s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(127) Exchange +(135) Exchange Input [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(128) Scan parquet +(136) Sort +Input [3]: [s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(137) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(129) Filter +(138) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(130) Project +(139) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(131) Exchange +(140) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(141) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(142) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(133) Project +(143) Project Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(134) Exchange +(144) Exchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(135) Sort +(145) Sort Input [2]: [s_name#X, s_address#X] Arguments: [s_name#X ASC NULLS FIRST], true, 0 -(136) AdaptiveSparkPlan +(146) AdaptiveSparkPlan Output [2]: [s_name#X, s_address#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/21.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/21.txt index c0f3602f9fe6..ef4e87bb1de4 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/21.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/21.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (129) +AdaptiveSparkPlan (138) +- == Final Plan == VeloxColumnarToRowExec (92) +- TakeOrderedAndProjectExecTransformer (91) @@ -73,42 +73,51 @@ AdaptiveSparkPlan (129) +- ^ FilterExecTransformer (71) +- ^ Scan parquet (70) +- == Initial Plan == - TakeOrderedAndProject (128) - +- HashAggregate (127) - +- Exchange (126) - +- HashAggregate (125) - +- Project (124) - +- ShuffledHashJoin Inner BuildRight (123) - :- Exchange (118) - : +- Project (117) - : +- ShuffledHashJoin Inner BuildRight (116) - : :- Exchange (111) - : : +- Project (110) - : : +- ShuffledHashJoin Inner BuildLeft (109) - : : :- Exchange (95) - : : : +- Filter (94) - : : : +- Scan parquet (93) - : : +- Exchange (108) - : : +- ShuffledHashJoin LeftAnti BuildRight (107) - : : :- ShuffledHashJoin LeftSemi BuildRight (102) - : : : :- Exchange (99) - : : : : +- Project (98) - : : : : +- Filter (97) - : : : : +- Scan parquet (96) - : : : +- Exchange (101) - : : : +- Scan parquet (100) - : : +- Exchange (106) - : : +- Project (105) - : : +- Filter (104) - : : +- Scan parquet (103) - : +- Exchange (115) - : +- Project (114) - : +- Filter (113) - : +- Scan parquet (112) - +- Exchange (122) - +- Project (121) - +- Filter (120) - +- Scan parquet (119) + TakeOrderedAndProject (137) + +- HashAggregate (136) + +- Exchange (135) + +- HashAggregate (134) + +- Project (133) + +- SortMergeJoin Inner (132) + :- Sort (126) + : +- Exchange (125) + : +- Project (124) + : +- SortMergeJoin Inner (123) + : :- Sort (117) + : : +- Exchange (116) + : : +- Project (115) + : : +- SortMergeJoin Inner (114) + : : :- Sort (96) + : : : +- Exchange (95) + : : : +- Filter (94) + : : : +- Scan parquet (93) + : : +- Sort (113) + : : +- Exchange (112) + : : +- SortMergeJoin LeftAnti (111) + : : :- SortMergeJoin LeftSemi (105) + : : : :- Sort (101) + : : : : +- Exchange (100) + : : : : +- Project (99) + : : : : +- Filter (98) + : : : : +- Scan parquet (97) + : : : +- Sort (104) + : : : +- Exchange (103) + : : : +- Scan parquet (102) + : : +- Sort (110) + : : +- Exchange (109) + : : +- Project (108) + : : +- Filter (107) + : : +- Scan parquet (106) + : +- Sort (122) + : +- Exchange (121) + : +- Project (120) + : +- Filter (119) + : +- Scan parquet (118) + +- Sort (131) + +- Exchange (130) + +- Project (129) + +- Filter (128) + +- Scan parquet (127) (1) Scan parquet @@ -501,163 +510,199 @@ Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(96) Scan parquet +(96) Sort +Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(97) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(97) Filter +(98) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((((isnotnull(l_receiptdate#X) AND isnotnull(l_commitdate#X)) AND (l_receiptdate#X > l_commitdate#X)) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(98) Project +(99) Project Output [2]: [l_orderkey#X, l_suppkey#X] Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] -(99) Exchange +(100) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(100) Scan parquet +(101) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(102) Scan parquet Output [2]: [l_orderkey#X, l_suppkey#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(101) Exchange +(103) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(102) ShuffledHashJoin +(104) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(105) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: NOT (l_suppkey#X = l_suppkey#X) -(103) Scan parquet +(106) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate)] ReadSchema: struct -(104) Filter +(107) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((isnotnull(l_receiptdate#X) AND isnotnull(l_commitdate#X)) AND (l_receiptdate#X > l_commitdate#X)) -(105) Project +(108) Project Output [2]: [l_orderkey#X, l_suppkey#X] Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] -(106) Exchange +(109) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(107) ShuffledHashJoin +(110) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(111) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: NOT (l_suppkey#X = l_suppkey#X) -(108) Exchange +(112) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(109) ShuffledHashJoin +(113) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(114) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [l_suppkey#X] Join condition: None -(110) Project +(115) Project Output [3]: [s_name#X, s_nationkey#X, l_orderkey#X] Input [5]: [s_suppkey#X, s_name#X, s_nationkey#X, l_orderkey#X, l_suppkey#X] -(111) Exchange +(116) Exchange Input [3]: [s_name#X, s_nationkey#X, l_orderkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(112) Scan parquet +(117) Sort +Input [3]: [s_name#X, s_nationkey#X, l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(118) Scan parquet Output [2]: [o_orderkey#X, o_orderstatus#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderstatus), EqualTo(o_orderstatus,F), IsNotNull(o_orderkey)] ReadSchema: struct -(113) Filter +(119) Filter Input [2]: [o_orderkey#X, o_orderstatus#X] Condition : ((isnotnull(o_orderstatus#X) AND (o_orderstatus#X = F)) AND isnotnull(o_orderkey#X)) -(114) Project +(120) Project Output [1]: [o_orderkey#X] Input [2]: [o_orderkey#X, o_orderstatus#X] -(115) Exchange +(121) Exchange Input [1]: [o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(116) ShuffledHashJoin +(122) Sort +Input [1]: [o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(123) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(117) Project +(124) Project Output [2]: [s_name#X, s_nationkey#X] Input [4]: [s_name#X, s_nationkey#X, l_orderkey#X, o_orderkey#X] -(118) Exchange +(125) Exchange Input [2]: [s_name#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(119) Scan parquet +(126) Sort +Input [2]: [s_name#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(127) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,SAUDI ARABIA), IsNotNull(n_nationkey)] ReadSchema: struct -(120) Filter +(128) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = SAUDI ARABIA)) AND isnotnull(n_nationkey#X)) -(121) Project +(129) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(122) Exchange +(130) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) ShuffledHashJoin +(131) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(132) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(124) Project +(133) Project Output [1]: [s_name#X] Input [3]: [s_name#X, s_nationkey#X, n_nationkey#X] -(125) HashAggregate +(134) HashAggregate Input [1]: [s_name#X] Keys [1]: [s_name#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [s_name#X, count#X] -(126) Exchange +(135) Exchange Input [2]: [s_name#X, count#X] Arguments: hashpartitioning(s_name#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(127) HashAggregate +(136) HashAggregate Input [2]: [s_name#X, count#X] Keys [1]: [s_name#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [s_name#X, count(1)#X AS numwait#X] -(128) TakeOrderedAndProject +(137) TakeOrderedAndProject Input [2]: [s_name#X, numwait#X] Arguments: X, [numwait#X DESC NULLS LAST, s_name#X ASC NULLS FIRST], [s_name#X, numwait#X] -(129) AdaptiveSparkPlan +(138) AdaptiveSparkPlan Output [2]: [s_name#X, numwait#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/22.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/22.txt index 5ab0811e658f..fcf712a9d5fd 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/22.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/22.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (50) +AdaptiveSparkPlan (52) +- == Final Plan == VeloxColumnarToRowExec (37) +- ^ SortExecTransformer (35) @@ -30,18 +30,20 @@ AdaptiveSparkPlan (50) +- ^ ProjectExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (49) - +- Exchange (48) - +- HashAggregate (47) - +- Exchange (46) - +- HashAggregate (45) - +- Project (44) - +- ShuffledHashJoin LeftAnti BuildRight (43) - :- Exchange (40) - : +- Filter (39) - : +- Scan parquet (38) - +- Exchange (42) - +- Scan parquet (41) + Sort (51) + +- Exchange (50) + +- HashAggregate (49) + +- Exchange (48) + +- HashAggregate (47) + +- Project (46) + +- SortMergeJoin LeftAnti (45) + :- Sort (41) + : +- Exchange (40) + : +- Filter (39) + : +- Scan parquet (38) + +- Sort (44) + +- Exchange (43) + +- Scan parquet (42) (1) Scan parquet @@ -210,51 +212,59 @@ Condition : ((isnotnull(c_acctbal#X) AND substring(c_phone#X, 1, 2) IN (13,31,23 Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(41) Scan parquet +(41) Sort +Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(42) Scan parquet Output [1]: [o_custkey#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(42) Exchange +(43) Exchange Input [1]: [o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) ShuffledHashJoin +(44) Sort +Input [1]: [o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(45) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(44) Project +(46) Project Output [2]: [substring(c_phone#X, 1, 2) AS cntrycode#X, c_acctbal#X] Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] -(45) HashAggregate +(47) HashAggregate Input [2]: [cntrycode#X, c_acctbal#X] Keys [1]: [cntrycode#X] Functions [2]: [partial_count(1), partial_sum(c_acctbal#X)] Aggregate Attributes [3]: [count#X, sum#X, isEmpty#X] Results [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] -(46) Exchange +(48) Exchange Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] Arguments: hashpartitioning(cntrycode#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) HashAggregate +(49) HashAggregate Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] Keys [1]: [cntrycode#X] Functions [2]: [count(1), sum(c_acctbal#X)] Aggregate Attributes [2]: [count(1)#X, sum(c_acctbal#X)#X] Results [3]: [cntrycode#X, count(1)#X AS numcust#X, sum(c_acctbal#X)#X AS totacctbal#X] -(48) Exchange +(50) Exchange Input [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: rangepartitioning(cntrycode#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(49) Sort +(51) Sort Input [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: [cntrycode#X ASC NULLS FIRST], true, 0 -(50) AdaptiveSparkPlan +(52) AdaptiveSparkPlan Output [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/3.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/3.txt index c51701bd0840..607d6444f432 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/3.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/3.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (63) +AdaptiveSparkPlan (67) +- == Final Plan == VeloxColumnarToRowExec (43) +- TakeOrderedAndProjectExecTransformer (42) @@ -36,25 +36,29 @@ AdaptiveSparkPlan (63) +- ^ FilterExecTransformer (28) +- ^ Scan parquet (27) +- == Initial Plan == - TakeOrderedAndProject (62) - +- HashAggregate (61) - +- HashAggregate (60) - +- Project (59) - +- ShuffledHashJoin Inner BuildRight (58) - :- Exchange (53) - : +- Project (52) - : +- ShuffledHashJoin Inner BuildLeft (51) - : :- Exchange (47) - : : +- Project (46) - : : +- Filter (45) - : : +- Scan parquet (44) - : +- Exchange (50) - : +- Filter (49) - : +- Scan parquet (48) - +- Exchange (57) - +- Project (56) - +- Filter (55) - +- Scan parquet (54) + TakeOrderedAndProject (66) + +- HashAggregate (65) + +- HashAggregate (64) + +- Project (63) + +- SortMergeJoin Inner (62) + :- Sort (56) + : +- Exchange (55) + : +- Project (54) + : +- SortMergeJoin Inner (53) + : :- Sort (48) + : : +- Exchange (47) + : : +- Project (46) + : : +- Filter (45) + : : +- Scan parquet (44) + : +- Sort (52) + : +- Exchange (51) + : +- Filter (50) + : +- Scan parquet (49) + +- Sort (61) + +- Exchange (60) + +- Project (59) + +- Filter (58) + +- Scan parquet (57) (1) Scan parquet @@ -256,80 +260,96 @@ Input [2]: [c_custkey#X, c_mktsegment#X] Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) Scan parquet +(48) Sort +Input [1]: [c_custkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(49) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), LessThan(o_orderdate,1995-03-15), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(49) Filter +(50) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Condition : (((isnotnull(o_orderdate#X) AND (o_orderdate#X < 1995-03-15)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(50) Exchange +(51) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(51) ShuffledHashJoin +(52) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(53) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(52) Project +(54) Project Output [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Input [5]: [c_custkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] -(53) Exchange +(55) Exchange Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(54) Scan parquet +(56) Sort +Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(57) Scan parquet Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThan(l_shipdate,1995-03-15), IsNotNull(l_orderkey)] ReadSchema: struct -(55) Filter +(58) Filter Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X > 1995-03-15)) AND isnotnull(l_orderkey#X)) -(56) Project +(59) Project Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(57) Exchange +(60) Exchange Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(58) ShuffledHashJoin +(61) Sort +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(62) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(59) Project +(63) Project Output [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] -(60) HashAggregate +(64) HashAggregate Input [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] -(61) HashAggregate +(65) HashAggregate Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [4]: [l_orderkey#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X AS revenue#X, o_orderdate#X, o_shippriority#X] -(62) TakeOrderedAndProject +(66) TakeOrderedAndProject Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: X, [revenue#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] -(63) AdaptiveSparkPlan +(67) AdaptiveSparkPlan Output [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/4.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/4.txt index 1b95ae3dbf39..cc6b8f351600 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/4.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/4.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (54) +AdaptiveSparkPlan (56) +- == Final Plan == VeloxColumnarToRowExec (38) +- ^ SortExecTransformer (36) @@ -31,21 +31,23 @@ AdaptiveSparkPlan (54) +- ^ FilterExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (53) - +- Exchange (52) - +- HashAggregate (51) - +- Exchange (50) - +- HashAggregate (49) - +- Project (48) - +- ShuffledHashJoin LeftSemi BuildRight (47) - :- Exchange (42) - : +- Project (41) - : +- Filter (40) - : +- Scan parquet (39) - +- Exchange (46) - +- Project (45) - +- Filter (44) - +- Scan parquet (43) + Sort (55) + +- Exchange (54) + +- HashAggregate (53) + +- Exchange (52) + +- HashAggregate (51) + +- Project (50) + +- SortMergeJoin LeftSemi (49) + :- Sort (43) + : +- Exchange (42) + : +- Project (41) + : +- Filter (40) + : +- Scan parquet (39) + +- Sort (48) + +- Exchange (47) + +- Project (46) + +- Filter (45) + +- Scan parquet (44) (1) Scan parquet @@ -223,60 +225,68 @@ Input [3]: [o_orderkey#X, o_orderdate#X, o_orderpriority#X] Input [2]: [o_orderkey#X, o_orderpriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) Scan parquet +(43) Sort +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(44) Scan parquet Output [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate)] ReadSchema: struct -(44) Filter +(45) Filter Input [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((isnotnull(l_commitdate#X) AND isnotnull(l_receiptdate#X)) AND (l_commitdate#X < l_receiptdate#X)) -(45) Project +(46) Project Output [1]: [l_orderkey#X] Input [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] -(46) Exchange +(47) Exchange Input [1]: [l_orderkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) ShuffledHashJoin +(48) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(49) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(48) Project +(50) Project Output [1]: [o_orderpriority#X] Input [2]: [o_orderkey#X, o_orderpriority#X] -(49) HashAggregate +(51) HashAggregate Input [1]: [o_orderpriority#X] Keys [1]: [o_orderpriority#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [o_orderpriority#X, count#X] -(50) Exchange +(52) Exchange Input [2]: [o_orderpriority#X, count#X] Arguments: hashpartitioning(o_orderpriority#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(51) HashAggregate +(53) HashAggregate Input [2]: [o_orderpriority#X, count#X] Keys [1]: [o_orderpriority#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [o_orderpriority#X, count(1)#X AS order_count#X] -(52) Exchange +(54) Exchange Input [2]: [o_orderpriority#X, order_count#X] Arguments: rangepartitioning(o_orderpriority#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(53) Sort +(55) Sort Input [2]: [o_orderpriority#X, order_count#X] Arguments: [o_orderpriority#X ASC NULLS FIRST], true, 0 -(54) AdaptiveSparkPlan +(56) AdaptiveSparkPlan Output [2]: [o_orderpriority#X, order_count#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/5.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/5.txt index c31fbccc1e59..a1f95887aae3 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/5.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/5.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (146) +AdaptiveSparkPlan (156) +- == Final Plan == VeloxColumnarToRowExec (106) +- ^ SortExecTransformer (104) @@ -83,45 +83,55 @@ AdaptiveSparkPlan (146) +- ^ FilterExecTransformer (79) +- ^ Scan parquet (78) +- == Initial Plan == - Sort (145) - +- Exchange (144) - +- HashAggregate (143) - +- Exchange (142) - +- HashAggregate (141) - +- Project (140) - +- ShuffledHashJoin Inner BuildRight (139) - :- Exchange (134) - : +- Project (133) - : +- ShuffledHashJoin Inner BuildRight (132) - : :- Exchange (128) - : : +- Project (127) - : : +- ShuffledHashJoin Inner BuildRight (126) - : : :- Exchange (122) - : : : +- Project (121) - : : : +- ShuffledHashJoin Inner BuildRight (120) - : : : :- Exchange (116) - : : : : +- Project (115) - : : : : +- ShuffledHashJoin Inner BuildLeft (114) - : : : : :- Exchange (109) - : : : : : +- Filter (108) - : : : : : +- Scan parquet (107) - : : : : +- Exchange (113) - : : : : +- Project (112) - : : : : +- Filter (111) - : : : : +- Scan parquet (110) - : : : +- Exchange (119) - : : : +- Filter (118) - : : : +- Scan parquet (117) - : : +- Exchange (125) - : : +- Filter (124) - : : +- Scan parquet (123) - : +- Exchange (131) - : +- Filter (130) - : +- Scan parquet (129) - +- Exchange (138) - +- Project (137) - +- Filter (136) - +- Scan parquet (135) + Sort (155) + +- Exchange (154) + +- HashAggregate (153) + +- Exchange (152) + +- HashAggregate (151) + +- Project (150) + +- SortMergeJoin Inner (149) + :- Sort (143) + : +- Exchange (142) + : +- Project (141) + : +- SortMergeJoin Inner (140) + : :- Sort (135) + : : +- Exchange (134) + : : +- Project (133) + : : +- SortMergeJoin Inner (132) + : : :- Sort (127) + : : : +- Exchange (126) + : : : +- Project (125) + : : : +- SortMergeJoin Inner (124) + : : : :- Sort (119) + : : : : +- Exchange (118) + : : : : +- Project (117) + : : : : +- SortMergeJoin Inner (116) + : : : : :- Sort (110) + : : : : : +- Exchange (109) + : : : : : +- Filter (108) + : : : : : +- Scan parquet (107) + : : : : +- Sort (115) + : : : : +- Exchange (114) + : : : : +- Project (113) + : : : : +- Filter (112) + : : : : +- Scan parquet (111) + : : : +- Sort (123) + : : : +- Exchange (122) + : : : +- Filter (121) + : : : +- Scan parquet (120) + : : +- Sort (131) + : : +- Exchange (130) + : : +- Filter (129) + : : +- Scan parquet (128) + : +- Sort (139) + : +- Exchange (138) + : +- Filter (137) + : +- Scan parquet (136) + +- Sort (148) + +- Exchange (147) + +- Project (146) + +- Filter (145) + +- Scan parquet (144) (1) Scan parquet @@ -567,176 +577,216 @@ Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(110) Scan parquet +(110) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(111) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1994-01-01), LessThan(o_orderdate,1995-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(111) Filter +(112) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1994-01-01)) AND (o_orderdate#X < 1995-01-01)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(112) Project +(113) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] -(113) Exchange +(114) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(115) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(116) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(115) Project +(117) Project Output [2]: [c_nationkey#X, o_orderkey#X] Input [4]: [c_custkey#X, c_nationkey#X, o_orderkey#X, o_custkey#X] -(116) Exchange +(118) Exchange Input [2]: [c_nationkey#X, o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(119) Sort +Input [2]: [c_nationkey#X, o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(120) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_orderkey), IsNotNull(l_suppkey)] ReadSchema: struct -(118) Filter +(121) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Condition : (isnotnull(l_orderkey#X) AND isnotnull(l_suppkey#X)) -(119) Exchange +(122) Exchange Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(123) Sort +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(124) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(121) Project +(125) Project Output [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Input [6]: [c_nationkey#X, o_orderkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] -(122) Exchange +(126) Exchange Input [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(127) Sort +Input [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST, c_nationkey#X ASC NULLS FIRST], false, 0 + +(128) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(124) Filter +(129) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(125) Exchange +(130) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) ShuffledHashJoin +(131) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST, s_nationkey#X ASC NULLS FIRST], false, 0 + +(132) SortMergeJoin Left keys [2]: [l_suppkey#X, c_nationkey#X] Right keys [2]: [s_suppkey#X, s_nationkey#X] Join condition: None -(127) Project +(133) Project Output [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] Input [6]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(128) Exchange +(134) Exchange Input [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Scan parquet +(135) Sort +Input [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(136) Scan parquet Output [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] ReadSchema: struct -(130) Filter +(137) Filter Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Condition : (isnotnull(n_nationkey#X) AND isnotnull(n_regionkey#X)) -(131) Exchange +(138) Exchange Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(139) Sort +Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(140) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(133) Project +(141) Project Output [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] Input [6]: [l_extendedprice#X, l_discount#X, s_nationkey#X, n_nationkey#X, n_name#X, n_regionkey#X] -(134) Exchange +(142) Exchange Input [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] Arguments: hashpartitioning(n_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(135) Scan parquet +(143) Sort +Input [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] +Arguments: [n_regionkey#X ASC NULLS FIRST], false, 0 + +(144) Scan parquet Output [2]: [r_regionkey#X, r_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(r_name), EqualTo(r_name,ASIA), IsNotNull(r_regionkey)] ReadSchema: struct -(136) Filter +(145) Filter Input [2]: [r_regionkey#X, r_name#X] Condition : ((isnotnull(r_name#X) AND (r_name#X = ASIA)) AND isnotnull(r_regionkey#X)) -(137) Project +(146) Project Output [1]: [r_regionkey#X] Input [2]: [r_regionkey#X, r_name#X] -(138) Exchange +(147) Exchange Input [1]: [r_regionkey#X] Arguments: hashpartitioning(r_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(139) ShuffledHashJoin +(148) Sort +Input [1]: [r_regionkey#X] +Arguments: [r_regionkey#X ASC NULLS FIRST], false, 0 + +(149) SortMergeJoin Left keys [1]: [n_regionkey#X] Right keys [1]: [r_regionkey#X] Join condition: None -(140) Project +(150) Project Output [3]: [l_extendedprice#X, l_discount#X, n_name#X] Input [5]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X, r_regionkey#X] -(141) HashAggregate +(151) HashAggregate Input [3]: [l_extendedprice#X, l_discount#X, n_name#X] Keys [1]: [n_name#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [n_name#X, sum#X, isEmpty#X] -(142) Exchange +(152) Exchange Input [3]: [n_name#X, sum#X, isEmpty#X] Arguments: hashpartitioning(n_name#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(143) HashAggregate +(153) HashAggregate Input [3]: [n_name#X, sum#X, isEmpty#X] Keys [1]: [n_name#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X] Results [2]: [n_name#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true))#X AS revenue#X] -(144) Exchange +(154) Exchange Input [2]: [n_name#X, revenue#X] Arguments: rangepartitioning(revenue#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(145) Sort +(155) Sort Input [2]: [n_name#X, revenue#X] Arguments: [revenue#X DESC NULLS LAST], true, 0 -(146) AdaptiveSparkPlan +(156) AdaptiveSparkPlan Output [2]: [n_name#X, revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/7.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/7.txt index 06b84fdca2c7..64d51413a084 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/7.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/7.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (139) +AdaptiveSparkPlan (149) +- == Final Plan == VeloxColumnarToRowExec (101) +- ^ SortExecTransformer (99) @@ -79,43 +79,53 @@ AdaptiveSparkPlan (139) +- ShuffleQueryStage (79) +- ReusedExchange (78) +- == Initial Plan == - Sort (138) - +- Exchange (137) - +- HashAggregate (136) - +- Exchange (135) - +- HashAggregate (134) - +- Project (133) - +- ShuffledHashJoin Inner BuildRight (132) - :- Exchange (128) - : +- Project (127) - : +- ShuffledHashJoin Inner BuildRight (126) - : :- Exchange (122) - : : +- Project (121) - : : +- ShuffledHashJoin Inner BuildRight (120) - : : :- Exchange (116) - : : : +- Project (115) - : : : +- ShuffledHashJoin Inner BuildRight (114) - : : : :- Exchange (110) - : : : : +- Project (109) - : : : : +- ShuffledHashJoin Inner BuildLeft (108) - : : : : :- Exchange (104) - : : : : : +- Filter (103) - : : : : : +- Scan parquet (102) - : : : : +- Exchange (107) - : : : : +- Filter (106) - : : : : +- Scan parquet (105) - : : : +- Exchange (113) - : : : +- Filter (112) - : : : +- Scan parquet (111) - : : +- Exchange (119) - : : +- Filter (118) - : : +- Scan parquet (117) - : +- Exchange (125) - : +- Filter (124) - : +- Scan parquet (123) - +- Exchange (131) - +- Filter (130) - +- Scan parquet (129) + Sort (148) + +- Exchange (147) + +- HashAggregate (146) + +- Exchange (145) + +- HashAggregate (144) + +- Project (143) + +- SortMergeJoin Inner (142) + :- Sort (137) + : +- Exchange (136) + : +- Project (135) + : +- SortMergeJoin Inner (134) + : :- Sort (129) + : : +- Exchange (128) + : : +- Project (127) + : : +- SortMergeJoin Inner (126) + : : :- Sort (121) + : : : +- Exchange (120) + : : : +- Project (119) + : : : +- SortMergeJoin Inner (118) + : : : :- Sort (113) + : : : : +- Exchange (112) + : : : : +- Project (111) + : : : : +- SortMergeJoin Inner (110) + : : : : :- Sort (105) + : : : : : +- Exchange (104) + : : : : : +- Filter (103) + : : : : : +- Scan parquet (102) + : : : : +- Sort (109) + : : : : +- Exchange (108) + : : : : +- Filter (107) + : : : : +- Scan parquet (106) + : : : +- Sort (117) + : : : +- Exchange (116) + : : : +- Filter (115) + : : : +- Scan parquet (114) + : : +- Sort (125) + : : +- Exchange (124) + : : +- Filter (123) + : : +- Scan parquet (122) + : +- Sort (133) + : +- Exchange (132) + : +- Filter (131) + : +- Scan parquet (130) + +- Sort (141) + +- Exchange (140) + +- Filter (139) + +- Scan parquet (138) (1) Scan parquet @@ -537,168 +547,208 @@ Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(105) Scan parquet +(105) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(106) Scan parquet Output [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1995-01-01), LessThanOrEqual(l_shipdate,1996-12-31), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(106) Filter +(107) Filter Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1995-01-01)) AND (l_shipdate#X <= 1996-12-31)) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(107) Exchange +(108) Exchange Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(108) ShuffledHashJoin +(109) Sort +Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(110) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [l_suppkey#X] Join condition: None -(109) Project +(111) Project Output [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Input [7]: [s_suppkey#X, s_nationkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(110) Exchange +(112) Exchange Input [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(111) Scan parquet +(113) Sort +Input [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(114) Scan parquet Output [2]: [o_orderkey#X, o_custkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderkey), IsNotNull(o_custkey)] ReadSchema: struct -(112) Filter +(115) Filter Input [2]: [o_orderkey#X, o_custkey#X] Condition : (isnotnull(o_orderkey#X) AND isnotnull(o_custkey#X)) -(113) Exchange +(116) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(117) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(118) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(115) Project +(119) Project Output [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] Input [7]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_orderkey#X, o_custkey#X] -(116) Exchange +(120) Exchange Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(121) Sort +Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(122) Scan parquet Output [2]: [c_custkey#X, c_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] ReadSchema: struct -(118) Filter +(123) Filter Input [2]: [c_custkey#X, c_nationkey#X] Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) -(119) Exchange +(124) Exchange Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(125) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(126) SortMergeJoin Left keys [1]: [o_custkey#X] Right keys [1]: [c_custkey#X] Join condition: None -(121) Project +(127) Project Output [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X, c_custkey#X, c_nationkey#X] -(122) Exchange +(128) Exchange Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(129) Sort +Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(130) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), Or(EqualTo(n_name,FRANCE),EqualTo(n_name,GERMANY))] ReadSchema: struct -(124) Filter +(131) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : (isnotnull(n_nationkey#X) AND ((n_name#X = FRANCE) OR (n_name#X = GERMANY))) -(125) Exchange +(132) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) ShuffledHashJoin +(133) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(134) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(127) Project +(135) Project Output [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_nationkey#X, n_name#X] -(128) Exchange +(136) Exchange Input [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Scan parquet +(137) Sort +Input [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(138) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), Or(EqualTo(n_name,GERMANY),EqualTo(n_name,FRANCE))] ReadSchema: struct -(130) Filter +(139) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : (isnotnull(n_nationkey#X) AND ((n_name#X = GERMANY) OR (n_name#X = FRANCE))) -(131) Exchange +(140) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(141) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(142) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: (((n_name#X = FRANCE) AND (n_name#X = GERMANY)) OR ((n_name#X = GERMANY) AND (n_name#X = FRANCE))) -(133) Project +(143) Project Output [4]: [n_name#X AS supp_nation#X, n_name#X AS cust_nation#X, year(l_shipdate#X) AS l_year#X, CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) AS volume#X] Input [7]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X, n_nationkey#X, n_name#X] -(134) HashAggregate +(144) HashAggregate Input [4]: [supp_nation#X, cust_nation#X, l_year#X, volume#X] Keys [3]: [supp_nation#X, cust_nation#X, l_year#X] Functions [1]: [partial_sum(volume#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] -(135) Exchange +(145) Exchange Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] Arguments: hashpartitioning(supp_nation#X, cust_nation#X, l_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(136) HashAggregate +(146) HashAggregate Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] Keys [3]: [supp_nation#X, cust_nation#X, l_year#X] Functions [1]: [sum(volume#X)] Aggregate Attributes [1]: [sum(volume#X)#X] Results [4]: [supp_nation#X, cust_nation#X, l_year#X, sum(volume#X)#X AS revenue#X] -(137) Exchange +(147) Exchange Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: rangepartitioning(supp_nation#X ASC NULLS FIRST, cust_nation#X ASC NULLS FIRST, l_year#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(138) Sort +(148) Sort Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: [supp_nation#X ASC NULLS FIRST, cust_nation#X ASC NULLS FIRST, l_year#X ASC NULLS FIRST], true, 0 -(139) AdaptiveSparkPlan +(149) AdaptiveSparkPlan Output [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/8.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/8.txt index e9fdc420f128..8934d1a2e7a3 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/8.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/8.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (193) +AdaptiveSparkPlan (207) +- == Final Plan == VeloxColumnarToRowExec (141) +- ^ SortExecTransformer (139) @@ -110,57 +110,71 @@ AdaptiveSparkPlan (193) +- ^ FilterExecTransformer (113) +- ^ Scan parquet (112) +- == Initial Plan == - Sort (192) - +- Exchange (191) - +- HashAggregate (190) - +- Exchange (189) - +- HashAggregate (188) - +- Project (187) - +- ShuffledHashJoin Inner BuildRight (186) - :- Exchange (181) - : +- Project (180) - : +- ShuffledHashJoin Inner BuildRight (179) - : :- Exchange (175) - : : +- Project (174) - : : +- ShuffledHashJoin Inner BuildRight (173) - : : :- Exchange (169) - : : : +- Project (168) - : : : +- ShuffledHashJoin Inner BuildRight (167) - : : : :- Exchange (163) - : : : : +- Project (162) - : : : : +- ShuffledHashJoin Inner BuildRight (161) - : : : : :- Exchange (157) - : : : : : +- Project (156) - : : : : : +- ShuffledHashJoin Inner BuildRight (155) - : : : : : :- Exchange (151) - : : : : : : +- Project (150) - : : : : : : +- ShuffledHashJoin Inner BuildLeft (149) - : : : : : : :- Exchange (145) - : : : : : : : +- Project (144) - : : : : : : : +- Filter (143) - : : : : : : : +- Scan parquet (142) - : : : : : : +- Exchange (148) - : : : : : : +- Filter (147) - : : : : : : +- Scan parquet (146) - : : : : : +- Exchange (154) - : : : : : +- Filter (153) - : : : : : +- Scan parquet (152) - : : : : +- Exchange (160) - : : : : +- Filter (159) - : : : : +- Scan parquet (158) - : : : +- Exchange (166) - : : : +- Filter (165) - : : : +- Scan parquet (164) - : : +- Exchange (172) - : : +- Filter (171) - : : +- Scan parquet (170) - : +- Exchange (178) - : +- Filter (177) - : +- Scan parquet (176) - +- Exchange (185) - +- Project (184) - +- Filter (183) - +- Scan parquet (182) + Sort (206) + +- Exchange (205) + +- HashAggregate (204) + +- Exchange (203) + +- HashAggregate (202) + +- Project (201) + +- SortMergeJoin Inner (200) + :- Sort (194) + : +- Exchange (193) + : +- Project (192) + : +- SortMergeJoin Inner (191) + : :- Sort (186) + : : +- Exchange (185) + : : +- Project (184) + : : +- SortMergeJoin Inner (183) + : : :- Sort (178) + : : : +- Exchange (177) + : : : +- Project (176) + : : : +- SortMergeJoin Inner (175) + : : : :- Sort (170) + : : : : +- Exchange (169) + : : : : +- Project (168) + : : : : +- SortMergeJoin Inner (167) + : : : : :- Sort (162) + : : : : : +- Exchange (161) + : : : : : +- Project (160) + : : : : : +- SortMergeJoin Inner (159) + : : : : : :- Sort (154) + : : : : : : +- Exchange (153) + : : : : : : +- Project (152) + : : : : : : +- SortMergeJoin Inner (151) + : : : : : : :- Sort (146) + : : : : : : : +- Exchange (145) + : : : : : : : +- Project (144) + : : : : : : : +- Filter (143) + : : : : : : : +- Scan parquet (142) + : : : : : : +- Sort (150) + : : : : : : +- Exchange (149) + : : : : : : +- Filter (148) + : : : : : : +- Scan parquet (147) + : : : : : +- Sort (158) + : : : : : +- Exchange (157) + : : : : : +- Filter (156) + : : : : : +- Scan parquet (155) + : : : : +- Sort (166) + : : : : +- Exchange (165) + : : : : +- Filter (164) + : : : : +- Scan parquet (163) + : : : +- Sort (174) + : : : +- Exchange (173) + : : : +- Filter (172) + : : : +- Scan parquet (171) + : : +- Sort (182) + : : +- Exchange (181) + : : +- Filter (180) + : : +- Scan parquet (179) + : +- Sort (190) + : +- Exchange (189) + : +- Filter (188) + : +- Scan parquet (187) + +- Sort (199) + +- Exchange (198) + +- Project (197) + +- Filter (196) + +- Scan parquet (195) (1) Scan parquet @@ -750,228 +764,284 @@ Input [2]: [p_partkey#X, p_type#X] Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(146) Scan parquet +(146) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(147) Scan parquet Output [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(147) Filter +(148) Filter Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Condition : ((isnotnull(l_partkey#X) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(148) Exchange +(149) Exchange Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(149) ShuffledHashJoin +(150) Sort +Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(151) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join condition: None -(150) Project +(152) Project Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Input [6]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] -(151) Exchange +(153) Exchange Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(152) Scan parquet +(154) Sort +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(155) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(153) Filter +(156) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(154) Exchange +(157) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(155) ShuffledHashJoin +(158) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(159) SortMergeJoin Left keys [1]: [l_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(156) Project +(160) Project Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Input [6]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(157) Exchange +(161) Exchange Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(158) Scan parquet +(162) Sort +Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(163) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1995-01-01), LessThanOrEqual(o_orderdate,1996-12-31), IsNotNull(o_orderkey), IsNotNull(o_custkey)] ReadSchema: struct -(159) Filter +(164) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1995-01-01)) AND (o_orderdate#X <= 1996-12-31)) AND isnotnull(o_orderkey#X)) AND isnotnull(o_custkey#X)) -(160) Exchange +(165) Exchange Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(161) ShuffledHashJoin +(166) Sort +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(167) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(162) Project +(168) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] Input [7]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X] -(163) Exchange +(169) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(164) Scan parquet +(170) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(171) Scan parquet Output [2]: [c_custkey#X, c_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] ReadSchema: struct -(165) Filter +(172) Filter Input [2]: [c_custkey#X, c_nationkey#X] Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) -(166) Exchange +(173) Exchange Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(167) ShuffledHashJoin +(174) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(175) SortMergeJoin Left keys [1]: [o_custkey#X] Right keys [1]: [c_custkey#X] Join condition: None -(168) Project +(176) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X, c_custkey#X, c_nationkey#X] -(169) Exchange +(177) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(170) Scan parquet +(178) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(179) Scan parquet Output [2]: [n_nationkey#X, n_regionkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] ReadSchema: struct -(171) Filter +(180) Filter Input [2]: [n_nationkey#X, n_regionkey#X] Condition : (isnotnull(n_nationkey#X) AND isnotnull(n_regionkey#X)) -(172) Exchange +(181) Exchange Input [2]: [n_nationkey#X, n_regionkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(173) ShuffledHashJoin +(182) Sort +Input [2]: [n_nationkey#X, n_regionkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(183) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(174) Project +(184) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X, n_nationkey#X, n_regionkey#X] -(175) Exchange +(185) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(176) Scan parquet +(186) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(187) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(177) Filter +(188) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(178) Exchange +(189) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(179) ShuffledHashJoin +(190) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(191) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(180) Project +(192) Project Output [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X, n_nationkey#X, n_name#X] -(181) Exchange +(193) Exchange Input [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] Arguments: hashpartitioning(n_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(182) Scan parquet +(194) Sort +Input [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] +Arguments: [n_regionkey#X ASC NULLS FIRST], false, 0 + +(195) Scan parquet Output [2]: [r_regionkey#X, r_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(r_name), EqualTo(r_name,AMERICA), IsNotNull(r_regionkey)] ReadSchema: struct -(183) Filter +(196) Filter Input [2]: [r_regionkey#X, r_name#X] Condition : ((isnotnull(r_name#X) AND (r_name#X = AMERICA)) AND isnotnull(r_regionkey#X)) -(184) Project +(197) Project Output [1]: [r_regionkey#X] Input [2]: [r_regionkey#X, r_name#X] -(185) Exchange +(198) Exchange Input [1]: [r_regionkey#X] Arguments: hashpartitioning(r_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(186) ShuffledHashJoin +(199) Sort +Input [1]: [r_regionkey#X] +Arguments: [r_regionkey#X ASC NULLS FIRST], false, 0 + +(200) SortMergeJoin Left keys [1]: [n_regionkey#X] Right keys [1]: [r_regionkey#X] Join condition: None -(187) Project +(201) Project Output [3]: [year(o_orderdate#X) AS o_year#X, CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) AS volume#X, n_name#X AS nation#X] Input [6]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X, r_regionkey#X] -(188) HashAggregate +(202) HashAggregate Input [3]: [o_year#X, volume#X, nation#X] Keys [1]: [o_year#X] Functions [2]: [partial_sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END), partial_sum(volume#X)] Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Results [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] -(189) Exchange +(203) Exchange Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] Arguments: hashpartitioning(o_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(190) HashAggregate +(204) HashAggregate Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] Keys [1]: [o_year#X] Functions [2]: [sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END), sum(volume#X)] Aggregate Attributes [2]: [sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X, sum(volume#X)#X] Results [2]: [o_year#X, CheckOverflow((promote_precision(sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X) / promote_precision(sum(volume#X)#X)), DecimalType(38,6), true) AS mkt_share#X] -(191) Exchange +(205) Exchange Input [2]: [o_year#X, mkt_share#X] Arguments: rangepartitioning(o_year#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(192) Sort +(206) Sort Input [2]: [o_year#X, mkt_share#X] Arguments: [o_year#X ASC NULLS FIRST], true, 0 -(193) AdaptiveSparkPlan +(207) AdaptiveSparkPlan Output [2]: [o_year#X, mkt_share#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/9.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/9.txt index d6bc308a9c2a..cb207c0800c3 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/9.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark32/9.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (145) +AdaptiveSparkPlan (155) +- == Final Plan == VeloxColumnarToRowExec (106) +- ^ SortExecTransformer (104) @@ -83,44 +83,54 @@ AdaptiveSparkPlan (145) +- ^ FilterExecTransformer (79) +- ^ Scan parquet (78) +- == Initial Plan == - Sort (144) - +- Exchange (143) - +- HashAggregate (142) - +- Exchange (141) - +- HashAggregate (140) - +- Project (139) - +- ShuffledHashJoin Inner BuildRight (138) - :- Exchange (134) - : +- Project (133) - : +- ShuffledHashJoin Inner BuildRight (132) - : :- Exchange (128) - : : +- Project (127) - : : +- ShuffledHashJoin Inner BuildRight (126) - : : :- Exchange (122) - : : : +- Project (121) - : : : +- ShuffledHashJoin Inner BuildRight (120) - : : : :- Exchange (116) - : : : : +- Project (115) - : : : : +- ShuffledHashJoin Inner BuildLeft (114) - : : : : :- Exchange (110) - : : : : : +- Project (109) - : : : : : +- Filter (108) - : : : : : +- Scan parquet (107) - : : : : +- Exchange (113) - : : : : +- Filter (112) - : : : : +- Scan parquet (111) - : : : +- Exchange (119) - : : : +- Filter (118) - : : : +- Scan parquet (117) - : : +- Exchange (125) - : : +- Filter (124) - : : +- Scan parquet (123) - : +- Exchange (131) - : +- Filter (130) - : +- Scan parquet (129) - +- Exchange (137) - +- Filter (136) - +- Scan parquet (135) + Sort (154) + +- Exchange (153) + +- HashAggregate (152) + +- Exchange (151) + +- HashAggregate (150) + +- Project (149) + +- SortMergeJoin Inner (148) + :- Sort (143) + : +- Exchange (142) + : +- Project (141) + : +- SortMergeJoin Inner (140) + : :- Sort (135) + : : +- Exchange (134) + : : +- Project (133) + : : +- SortMergeJoin Inner (132) + : : :- Sort (127) + : : : +- Exchange (126) + : : : +- Project (125) + : : : +- SortMergeJoin Inner (124) + : : : :- Sort (119) + : : : : +- Exchange (118) + : : : : +- Project (117) + : : : : +- SortMergeJoin Inner (116) + : : : : :- Sort (111) + : : : : : +- Exchange (110) + : : : : : +- Project (109) + : : : : : +- Filter (108) + : : : : : +- Scan parquet (107) + : : : : +- Sort (115) + : : : : +- Exchange (114) + : : : : +- Filter (113) + : : : : +- Scan parquet (112) + : : : +- Sort (123) + : : : +- Exchange (122) + : : : +- Filter (121) + : : : +- Scan parquet (120) + : : +- Sort (131) + : : +- Exchange (130) + : : +- Filter (129) + : : +- Scan parquet (128) + : +- Sort (139) + : +- Exchange (138) + : +- Filter (137) + : +- Scan parquet (136) + +- Sort (147) + +- Exchange (146) + +- Filter (145) + +- Scan parquet (144) (1) Scan parquet @@ -570,168 +580,208 @@ Input [2]: [p_partkey#X, p_name#X] Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(111) Scan parquet +(111) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(112) Scan parquet Output [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(112) Filter +(113) Filter Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Condition : ((isnotnull(l_partkey#X) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(113) Exchange +(114) Exchange Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(115) Sort +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(116) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join condition: None -(115) Project +(117) Project Output [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Input [7]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] -(116) Exchange +(118) Exchange Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(119) Sort +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(120) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(118) Filter +(121) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(119) Exchange +(122) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(123) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(124) SortMergeJoin Left keys [1]: [l_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(121) Project +(125) Project Output [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Input [8]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(122) Exchange +(126) Exchange Input [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(l_suppkey#X, l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(127) Sort +Input [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [l_suppkey#X ASC NULLS FIRST, l_partkey#X ASC NULLS FIRST], false, 0 + +(128) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey), IsNotNull(ps_partkey)] ReadSchema: struct -(124) Filter +(129) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Condition : (isnotnull(ps_suppkey#X) AND isnotnull(ps_partkey#X)) -(125) Exchange +(130) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) ShuffledHashJoin +(131) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST, ps_partkey#X ASC NULLS FIRST], false, 0 + +(132) SortMergeJoin Left keys [2]: [l_suppkey#X, l_partkey#X] Right keys [2]: [ps_suppkey#X, ps_partkey#X] Join condition: None -(127) Project +(133) Project Output [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] Input [10]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_partkey#X, ps_suppkey#X, ps_supplycost#X] -(128) Exchange +(134) Exchange Input [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Scan parquet +(135) Sort +Input [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(136) Scan parquet Output [2]: [o_orderkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderkey)] ReadSchema: struct -(130) Filter +(137) Filter Input [2]: [o_orderkey#X, o_orderdate#X] Condition : isnotnull(o_orderkey#X) -(131) Exchange +(138) Exchange Input [2]: [o_orderkey#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(139) Sort +Input [2]: [o_orderkey#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(140) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(133) Project +(141) Project Output [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] Input [8]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderkey#X, o_orderdate#X] -(134) Exchange +(142) Exchange Input [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(135) Scan parquet +(143) Sort +Input [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(144) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(136) Filter +(145) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(137) Exchange +(146) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(138) ShuffledHashJoin +(147) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(148) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(139) Project +(149) Project Output [3]: [n_name#X AS nation#X, year(o_orderdate#X) AS o_year#X, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2), true))), DecimalType(26,4), true) as decimal(27,4))) - promote_precision(cast(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(l_quantity#X)), DecimalType(25,4), true) as decimal(27,4)))), DecimalType(27,4), true) AS amount#X] Input [8]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X, n_nationkey#X, n_name#X] -(140) HashAggregate +(150) HashAggregate Input [3]: [nation#X, o_year#X, amount#X] Keys [2]: [nation#X, o_year#X] Functions [1]: [partial_sum(amount#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [nation#X, o_year#X, sum#X, isEmpty#X] -(141) Exchange +(151) Exchange Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] Arguments: hashpartitioning(nation#X, o_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(142) HashAggregate +(152) HashAggregate Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] Keys [2]: [nation#X, o_year#X] Functions [1]: [sum(amount#X)] Aggregate Attributes [1]: [sum(amount#X)#X] Results [3]: [nation#X, o_year#X, sum(amount#X)#X AS sum_profit#X] -(143) Exchange +(153) Exchange Input [3]: [nation#X, o_year#X, sum_profit#X] Arguments: rangepartitioning(nation#X ASC NULLS FIRST, o_year#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(144) Sort +(154) Sort Input [3]: [nation#X, o_year#X, sum_profit#X] Arguments: [nation#X ASC NULLS FIRST, o_year#X DESC NULLS LAST], true, 0 -(145) AdaptiveSparkPlan +(155) AdaptiveSparkPlan Output [3]: [nation#X, o_year#X, sum_profit#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/10.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/10.txt index fcb13291c838..c295515b8a6c 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/10.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/10.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (94) +AdaptiveSparkPlan (100) +- == Final Plan == VeloxColumnarToRowExec (67) +- TakeOrderedAndProjectExecTransformer (66) @@ -54,32 +54,38 @@ AdaptiveSparkPlan (94) +- ^ FilterExecTransformer (45) +- ^ Scan parquet (44) +- == Initial Plan == - TakeOrderedAndProject (93) - +- HashAggregate (92) - +- Exchange (91) - +- HashAggregate (90) - +- Project (89) - +- ShuffledHashJoin Inner BuildRight (88) - :- Exchange (84) - : +- Project (83) - : +- ShuffledHashJoin Inner BuildRight (82) - : :- Exchange (77) - : : +- Project (76) - : : +- ShuffledHashJoin Inner BuildRight (75) - : : :- Exchange (70) - : : : +- Filter (69) - : : : +- Scan parquet (68) - : : +- Exchange (74) - : : +- Project (73) - : : +- Filter (72) - : : +- Scan parquet (71) - : +- Exchange (81) - : +- Project (80) - : +- Filter (79) - : +- Scan parquet (78) - +- Exchange (87) - +- Filter (86) - +- Scan parquet (85) + TakeOrderedAndProject (99) + +- HashAggregate (98) + +- Exchange (97) + +- HashAggregate (96) + +- Project (95) + +- SortMergeJoin Inner (94) + :- Sort (89) + : +- Exchange (88) + : +- Project (87) + : +- SortMergeJoin Inner (86) + : :- Sort (80) + : : +- Exchange (79) + : : +- Project (78) + : : +- SortMergeJoin Inner (77) + : : :- Sort (71) + : : : +- Exchange (70) + : : : +- Filter (69) + : : : +- Scan parquet (68) + : : +- Sort (76) + : : +- Exchange (75) + : : +- Project (74) + : : +- Filter (73) + : : +- Scan parquet (72) + : +- Sort (85) + : +- Exchange (84) + : +- Project (83) + : +- Filter (82) + : +- Scan parquet (81) + +- Sort (93) + +- Exchange (92) + +- Filter (91) + +- Scan parquet (90) (1) Scan parquet @@ -371,116 +377,140 @@ Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) Input [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(71) Scan parquet +(71) Sort +Input [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(72) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1993-10-01), LessThan(o_orderdate,1994-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(72) Filter +(73) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1993-10-01)) AND (o_orderdate#X < 1994-01-01)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(73) Project +(74) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] -(74) Exchange +(75) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(75) ShuffledHashJoin +(76) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(77) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(76) Project +(78) Project Output [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, o_custkey#X] -(77) Exchange +(79) Exchange Input [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(78) Scan parquet +(80) Sort +Input [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(81) Scan parquet Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_returnflag), EqualTo(l_returnflag,R), IsNotNull(l_orderkey)] ReadSchema: struct -(79) Filter +(82) Filter Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] Condition : ((isnotnull(l_returnflag#X) AND (l_returnflag#X = R)) AND isnotnull(l_orderkey#X)) -(80) Project +(83) Project Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] -(81) Exchange +(84) Exchange Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(82) ShuffledHashJoin +(85) Sort +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(86) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(83) Project +(87) Project Output [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X] -(84) Exchange +(88) Exchange Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(85) Scan parquet +(89) Sort +Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(90) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(86) Filter +(91) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(87) Exchange +(92) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(88) ShuffledHashJoin +(93) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(94) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(89) Project +(95) Project Output [9]: [c_custkey#X, c_name#X, c_address#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_name#X] Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_nationkey#X, n_name#X] -(90) HashAggregate +(96) HashAggregate Input [9]: [c_custkey#X, c_name#X, c_address#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_name#X] Keys [7]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] -(91) Exchange +(97) Exchange Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] Arguments: hashpartitioning(c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(92) HashAggregate +(98) HashAggregate Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] Keys [7]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [8]: [c_custkey#X, c_name#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] -(93) TakeOrderedAndProject +(99) TakeOrderedAndProject Input [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] Arguments: X, [revenue#X DESC NULLS LAST], [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] -(94) AdaptiveSparkPlan +(100) AdaptiveSparkPlan Output [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/11.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/11.txt index bbd20320b798..20bb486f3841 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/11.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/11.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (78) +AdaptiveSparkPlan (82) +- == Final Plan == VeloxColumnarToRowExec (56) +- ^ SortExecTransformer (54) @@ -45,27 +45,31 @@ AdaptiveSparkPlan (78) +- ^ FilterExecTransformer (28) +- ^ Scan parquet (27) +- == Initial Plan == - Sort (77) - +- Exchange (76) - +- Filter (75) - +- HashAggregate (74) - +- Exchange (73) - +- HashAggregate (72) - +- Project (71) - +- ShuffledHashJoin Inner BuildRight (70) - :- Exchange (65) - : +- Project (64) - : +- ShuffledHashJoin Inner BuildRight (63) - : :- Exchange (59) - : : +- Filter (58) - : : +- Scan parquet (57) - : +- Exchange (62) - : +- Filter (61) - : +- Scan parquet (60) - +- Exchange (69) - +- Project (68) - +- Filter (67) - +- Scan parquet (66) + Sort (81) + +- Exchange (80) + +- Filter (79) + +- HashAggregate (78) + +- Exchange (77) + +- HashAggregate (76) + +- Project (75) + +- SortMergeJoin Inner (74) + :- Sort (68) + : +- Exchange (67) + : +- Project (66) + : +- SortMergeJoin Inner (65) + : :- Sort (60) + : : +- Exchange (59) + : : +- Filter (58) + : : +- Scan parquet (57) + : +- Sort (64) + : +- Exchange (63) + : +- Filter (62) + : +- Scan parquet (61) + +- Sort (73) + +- Exchange (72) + +- Project (71) + +- Filter (70) + +- Scan parquet (69) (1) Scan parquet @@ -311,359 +315,395 @@ Condition : isnotnull(ps_suppkey#X) Input [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(60) Scan parquet +(60) Sort +Input [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(61) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(61) Filter +(62) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(62) Exchange +(63) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(63) ShuffledHashJoin +(64) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(65) SortMergeJoin Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(64) Project +(66) Project Output [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(65) Exchange +(67) Exchange Input [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(66) Scan parquet +(68) Sort +Input [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(69) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,GERMANY), IsNotNull(n_nationkey)] ReadSchema: struct -(67) Filter +(70) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = GERMANY)) AND isnotnull(n_nationkey#X)) -(68) Project +(71) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(69) Exchange +(72) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(70) ShuffledHashJoin +(73) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(74) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(71) Project +(75) Project Output [3]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X] Input [5]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(72) HashAggregate +(76) HashAggregate Input [3]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X] Keys [1]: [ps_partkey#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [ps_partkey#X, sum#X, isEmpty#X] -(73) Exchange +(77) Exchange Input [3]: [ps_partkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) HashAggregate +(78) HashAggregate Input [3]: [ps_partkey#X, sum#X, isEmpty#X] Keys [1]: [ps_partkey#X] Functions [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X] Results [2]: [ps_partkey#X, sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X AS value#X] -(75) Filter +(79) Filter Input [2]: [ps_partkey#X, value#X] Condition : (isnotnull(value#X) AND (cast(value#X as decimal(38,6)) > Subquery subquery#X, [id=#X])) -(76) Exchange +(80) Exchange Input [2]: [ps_partkey#X, value#X] Arguments: rangepartitioning(value#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(77) Sort +(81) Sort Input [2]: [ps_partkey#X, value#X] Arguments: [value#X DESC NULLS LAST], true, 0 -(78) AdaptiveSparkPlan +(82) AdaptiveSparkPlan Output [2]: [ps_partkey#X, value#X] Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 47 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (128) +AdaptiveSparkPlan (136) +- == Final Plan == - VeloxColumnarToRowExec (110) - +- ^ ProjectExecTransformer (108) - +- ^ RegularHashAggregateExecTransformer (107) - +- ^ RegularHashAggregateExecTransformer (106) - +- ^ ProjectExecTransformer (105) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (104) - :- ^ InputIteratorTransformer (99) - : +- ShuffleQueryStage (97), Statistics(X) - : +- ColumnarExchange (96) - : +- VeloxAppendBatches (95) - : +- ^ ProjectExecTransformer (93) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (92) - : :- ^ InputIteratorTransformer (87) - : : +- ShuffleQueryStage (85), Statistics(X) - : : +- ColumnarExchange (84) - : : +- VeloxAppendBatches (83) - : : +- ^ ProjectExecTransformer (81) - : : +- ^ FilterExecTransformer (80) - : : +- ^ Scan parquet (79) - : +- ^ InputIteratorTransformer (91) - : +- ShuffleQueryStage (89), Statistics(X) - : +- ReusedExchange (88) - +- ^ InputIteratorTransformer (103) - +- ShuffleQueryStage (101), Statistics(X) - +- ReusedExchange (100) + VeloxColumnarToRowExec (114) + +- ^ ProjectExecTransformer (112) + +- ^ RegularHashAggregateExecTransformer (111) + +- ^ RegularHashAggregateExecTransformer (110) + +- ^ ProjectExecTransformer (109) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (108) + :- ^ InputIteratorTransformer (103) + : +- ShuffleQueryStage (101), Statistics(X) + : +- ColumnarExchange (100) + : +- VeloxAppendBatches (99) + : +- ^ ProjectExecTransformer (97) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (96) + : :- ^ InputIteratorTransformer (91) + : : +- ShuffleQueryStage (89), Statistics(X) + : : +- ColumnarExchange (88) + : : +- VeloxAppendBatches (87) + : : +- ^ ProjectExecTransformer (85) + : : +- ^ FilterExecTransformer (84) + : : +- ^ Scan parquet (83) + : +- ^ InputIteratorTransformer (95) + : +- ShuffleQueryStage (93), Statistics(X) + : +- ReusedExchange (92) + +- ^ InputIteratorTransformer (107) + +- ShuffleQueryStage (105), Statistics(X) + +- ReusedExchange (104) +- == Initial Plan == - HashAggregate (127) - +- HashAggregate (126) - +- Project (125) - +- ShuffledHashJoin Inner BuildRight (124) - :- Exchange (119) - : +- Project (118) - : +- ShuffledHashJoin Inner BuildRight (117) - : :- Exchange (113) - : : +- Filter (112) - : : +- Scan parquet (111) - : +- Exchange (116) - : +- Filter (115) - : +- Scan parquet (114) - +- Exchange (123) - +- Project (122) - +- Filter (121) - +- Scan parquet (120) - - -(79) Scan parquet + HashAggregate (135) + +- HashAggregate (134) + +- Project (133) + +- SortMergeJoin Inner (132) + :- Sort (126) + : +- Exchange (125) + : +- Project (124) + : +- SortMergeJoin Inner (123) + : :- Sort (118) + : : +- Exchange (117) + : : +- Filter (116) + : : +- Scan parquet (115) + : +- Sort (122) + : +- Exchange (121) + : +- Filter (120) + : +- Scan parquet (119) + +- Sort (131) + +- Exchange (130) + +- Project (129) + +- Filter (128) + +- Scan parquet (127) + + +(83) Scan parquet Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey)] ReadSchema: struct -(80) FilterExecTransformer +(84) FilterExecTransformer Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: isnotnull(ps_suppkey#X) -(81) ProjectExecTransformer +(85) ProjectExecTransformer Output [4]: [hash(ps_suppkey#X, 42) AS hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] -(82) WholeStageCodegenTransformer (X) +(86) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: false -(83) VeloxAppendBatches +(87) VeloxAppendBatches Input [4]: [hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: X -(84) ColumnarExchange +(88) ColumnarExchange Input [4]: [hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [ps_suppkey#X, ps_availqty#X, ps_supplycost#X], [plan_id=X], [id=#X] -(85) ShuffleQueryStage +(89) ShuffleQueryStage Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: X -(86) InputAdapter +(90) InputAdapter Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] -(87) InputIteratorTransformer +(91) InputIteratorTransformer Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] -(88) ReusedExchange [Reuses operator id: 15] +(92) ReusedExchange [Reuses operator id: 15] Output [2]: [s_suppkey#X, s_nationkey#X] -(89) ShuffleQueryStage +(93) ShuffleQueryStage Output [2]: [s_suppkey#X, s_nationkey#X] Arguments: X -(90) InputAdapter +(94) InputAdapter Input [2]: [s_suppkey#X, s_nationkey#X] -(91) InputIteratorTransformer +(95) InputIteratorTransformer Input [2]: [s_suppkey#X, s_nationkey#X] -(92) ShuffledHashJoinExecTransformer +(96) ShuffledHashJoinExecTransformer Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(93) ProjectExecTransformer +(97) ProjectExecTransformer Output [4]: [hash(s_nationkey#X, 42) AS hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [5]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(94) WholeStageCodegenTransformer (X) +(98) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: false -(95) VeloxAppendBatches +(99) VeloxAppendBatches Input [4]: [hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: X -(96) ColumnarExchange +(100) ColumnarExchange Input [4]: [hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [ps_availqty#X, ps_supplycost#X, s_nationkey#X], [plan_id=X], [id=#X] -(97) ShuffleQueryStage +(101) ShuffleQueryStage Output [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: X -(98) InputAdapter +(102) InputAdapter Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] -(99) InputIteratorTransformer +(103) InputIteratorTransformer Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] -(100) ReusedExchange [Reuses operator id: 32] +(104) ReusedExchange [Reuses operator id: 32] Output [1]: [n_nationkey#X] -(101) ShuffleQueryStage +(105) ShuffleQueryStage Output [1]: [n_nationkey#X] Arguments: X -(102) InputAdapter +(106) InputAdapter Input [1]: [n_nationkey#X] -(103) InputIteratorTransformer +(107) InputIteratorTransformer Input [1]: [n_nationkey#X] -(104) ShuffledHashJoinExecTransformer +(108) ShuffledHashJoinExecTransformer Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(105) ProjectExecTransformer +(109) ProjectExecTransformer Output [3]: [ps_availqty#X, ps_supplycost#X, CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)) AS _pre_X#X] Input [4]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(106) RegularHashAggregateExecTransformer +(110) RegularHashAggregateExecTransformer Input [3]: [ps_availqty#X, ps_supplycost#X, _pre_X#X] Keys: [] Functions [1]: [partial_sum(_pre_X#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(107) RegularHashAggregateExecTransformer +(111) RegularHashAggregateExecTransformer Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X] Results [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X] -(108) ProjectExecTransformer +(112) ProjectExecTransformer Output [1]: [CheckOverflow((promote_precision(cast(sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X as decimal(38,10))) * 0.0001000000), DecimalType(38,6)) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Input [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X] -(109) WholeStageCodegenTransformer (X) +(113) WholeStageCodegenTransformer (X) Input [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Arguments: false -(110) VeloxColumnarToRowExec +(114) VeloxColumnarToRowExec Input [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] -(111) Scan parquet +(115) Scan parquet Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey)] ReadSchema: struct -(112) Filter +(116) Filter Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Condition : isnotnull(ps_suppkey#X) -(113) Exchange +(117) Exchange Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) Scan parquet +(118) Sort +Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(119) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(115) Filter +(120) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(116) Exchange +(121) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) ShuffledHashJoin +(122) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(123) SortMergeJoin Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(118) Project +(124) Project Output [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [5]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(119) Exchange +(125) Exchange Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) Scan parquet +(126) Sort +Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(127) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,GERMANY), IsNotNull(n_nationkey)] ReadSchema: struct -(121) Filter +(128) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = GERMANY)) AND isnotnull(n_nationkey#X)) -(122) Project +(129) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(123) Exchange +(130) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(124) ShuffledHashJoin +(131) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(132) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(125) Project +(133) Project Output [2]: [ps_availqty#X, ps_supplycost#X] Input [4]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(126) HashAggregate +(134) HashAggregate Input [2]: [ps_availqty#X, ps_supplycost#X] Keys: [] Functions [1]: [partial_sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(127) HashAggregate +(135) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X] Results [1]: [CheckOverflow((promote_precision(cast(sum(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(cast(ps_availqty#X as decimal(12,2)))), DecimalType(23,2)))#X as decimal(38,10))) * 0.0001000000), DecimalType(38,6)) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] -(128) AdaptiveSparkPlan +(136) AdaptiveSparkPlan Output [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/12.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/12.txt index 194b60bb7713..1b36d274aab4 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/12.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/12.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (53) +AdaptiveSparkPlan (55) +- == Final Plan == VeloxColumnarToRowExec (38) +- ^ SortExecTransformer (36) @@ -31,20 +31,22 @@ AdaptiveSparkPlan (53) +- ^ FilterExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (52) - +- Exchange (51) - +- HashAggregate (50) - +- Exchange (49) - +- HashAggregate (48) - +- Project (47) - +- ShuffledHashJoin Inner BuildLeft (46) - :- Exchange (41) - : +- Filter (40) - : +- Scan parquet (39) - +- Exchange (45) - +- Project (44) - +- Filter (43) - +- Scan parquet (42) + Sort (54) + +- Exchange (53) + +- HashAggregate (52) + +- Exchange (51) + +- HashAggregate (50) + +- Project (49) + +- SortMergeJoin Inner (48) + :- Sort (42) + : +- Exchange (41) + : +- Filter (40) + : +- Scan parquet (39) + +- Sort (47) + +- Exchange (46) + +- Project (45) + +- Filter (44) + +- Scan parquet (43) (1) Scan parquet @@ -218,60 +220,68 @@ Condition : isnotnull(o_orderkey#X) Input [2]: [o_orderkey#X, o_orderpriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(42) Scan parquet +(42) Sort +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(43) Scan parquet Output [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate), IsNotNull(l_shipdate), In(l_shipmode, [MAIL,SHIP]), GreaterThanOrEqual(l_receiptdate,1994-01-01), LessThan(l_receiptdate,1995-01-01), IsNotNull(l_orderkey)] ReadSchema: struct -(43) Filter +(44) Filter Input [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] Condition : ((((((((isnotnull(l_commitdate#X) AND isnotnull(l_receiptdate#X)) AND isnotnull(l_shipdate#X)) AND l_shipmode#X IN (MAIL,SHIP)) AND (l_commitdate#X < l_receiptdate#X)) AND (l_shipdate#X < l_commitdate#X)) AND (l_receiptdate#X >= 1994-01-01)) AND (l_receiptdate#X < 1995-01-01)) AND isnotnull(l_orderkey#X)) -(44) Project +(45) Project Output [2]: [l_orderkey#X, l_shipmode#X] Input [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] -(45) Exchange +(46) Exchange Input [2]: [l_orderkey#X, l_shipmode#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(46) ShuffledHashJoin +(47) Sort +Input [2]: [l_orderkey#X, l_shipmode#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(48) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(47) Project +(49) Project Output [2]: [o_orderpriority#X, l_shipmode#X] Input [4]: [o_orderkey#X, o_orderpriority#X, l_orderkey#X, l_shipmode#X] -(48) HashAggregate +(50) HashAggregate Input [2]: [o_orderpriority#X, l_shipmode#X] Keys [1]: [l_shipmode#X] Functions [2]: [partial_sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)] Aggregate Attributes [2]: [sum#X, sum#X] Results [3]: [l_shipmode#X, sum#X, sum#X] -(49) Exchange +(51) Exchange Input [3]: [l_shipmode#X, sum#X, sum#X] Arguments: hashpartitioning(l_shipmode#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(50) HashAggregate +(52) HashAggregate Input [3]: [l_shipmode#X, sum#X, sum#X] Keys [1]: [l_shipmode#X] Functions [2]: [sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END), sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)] Aggregate Attributes [2]: [sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X, sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X] Results [3]: [l_shipmode#X, sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X AS high_line_count#X, sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X AS low_line_count#X] -(51) Exchange +(53) Exchange Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: rangepartitioning(l_shipmode#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(52) Sort +(54) Sort Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: [l_shipmode#X ASC NULLS FIRST], true, 0 -(53) AdaptiveSparkPlan +(55) AdaptiveSparkPlan Output [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/13.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/13.txt index a9d4e199cfd2..83ec9aeda98a 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/13.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/13.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (56) +AdaptiveSparkPlan (58) +- == Final Plan == VeloxColumnarToRowExec (40) +- ^ SortExecTransformer (38) @@ -33,21 +33,23 @@ AdaptiveSparkPlan (56) +- ^ FilterExecTransformer (10) +- ^ Scan parquet (9) +- == Initial Plan == - Sort (55) - +- Exchange (54) - +- HashAggregate (53) - +- Exchange (52) - +- HashAggregate (51) - +- HashAggregate (50) - +- HashAggregate (49) - +- Project (48) - +- ShuffledHashJoin LeftOuter BuildRight (47) - :- Exchange (42) - : +- Scan parquet (41) - +- Exchange (46) - +- Project (45) - +- Filter (44) - +- Scan parquet (43) + Sort (57) + +- Exchange (56) + +- HashAggregate (55) + +- Exchange (54) + +- HashAggregate (53) + +- HashAggregate (52) + +- HashAggregate (51) + +- Project (50) + +- SortMergeJoin LeftOuter (49) + :- Sort (43) + : +- Exchange (42) + : +- Scan parquet (41) + +- Sort (48) + +- Exchange (47) + +- Project (46) + +- Filter (45) + +- Scan parquet (44) (1) Scan parquet @@ -229,74 +231,82 @@ ReadSchema: struct Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) Scan parquet +(43) Sort +Input [1]: [c_custkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(44) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_comment), IsNotNull(o_custkey)] ReadSchema: struct -(44) Filter +(45) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Condition : ((isnotnull(o_comment#X) AND NOT o_comment#X LIKE %special%requests%) AND isnotnull(o_custkey#X)) -(45) Project +(46) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] -(46) Exchange +(47) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) ShuffledHashJoin +(48) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(49) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(48) Project +(50) Project Output [2]: [c_custkey#X, o_orderkey#X] Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] -(49) HashAggregate +(51) HashAggregate Input [2]: [c_custkey#X, o_orderkey#X] Keys [1]: [c_custkey#X] Functions [1]: [partial_count(o_orderkey#X)] Aggregate Attributes [1]: [count#X] Results [2]: [c_custkey#X, count#X] -(50) HashAggregate +(52) HashAggregate Input [2]: [c_custkey#X, count#X] Keys [1]: [c_custkey#X] Functions [1]: [count(o_orderkey#X)] Aggregate Attributes [1]: [count(o_orderkey#X)#X] Results [1]: [count(o_orderkey#X)#X AS c_count#X] -(51) HashAggregate +(53) HashAggregate Input [1]: [c_count#X] Keys [1]: [c_count#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [c_count#X, count#X] -(52) Exchange +(54) Exchange Input [2]: [c_count#X, count#X] Arguments: hashpartitioning(c_count#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(53) HashAggregate +(55) HashAggregate Input [2]: [c_count#X, count#X] Keys [1]: [c_count#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [c_count#X, count(1)#X AS custdist#X] -(54) Exchange +(56) Exchange Input [2]: [c_count#X, custdist#X] Arguments: rangepartitioning(custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(55) Sort +(57) Sort Input [2]: [c_count#X, custdist#X] Arguments: [custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST], true, 0 -(56) AdaptiveSparkPlan +(58) AdaptiveSparkPlan Output [2]: [c_count#X, custdist#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/14.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/14.txt index afac58cb52bc..a1108606b5bb 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/14.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/14.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (37) +AdaptiveSparkPlan (39) +- == Final Plan == VeloxColumnarToRowExec (25) +- ^ ProjectExecTransformer (23) @@ -22,17 +22,19 @@ AdaptiveSparkPlan (37) +- ^ FilterExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - HashAggregate (36) - +- HashAggregate (35) - +- Project (34) - +- ShuffledHashJoin Inner BuildRight (33) - :- Exchange (29) - : +- Project (28) - : +- Filter (27) - : +- Scan parquet (26) - +- Exchange (32) - +- Filter (31) - +- Scan parquet (30) + HashAggregate (38) + +- HashAggregate (37) + +- Project (36) + +- SortMergeJoin Inner (35) + :- Sort (30) + : +- Exchange (29) + : +- Project (28) + : +- Filter (27) + : +- Scan parquet (26) + +- Sort (34) + +- Exchange (33) + +- Filter (32) + +- Scan parquet (31) (1) Scan parquet @@ -162,44 +164,52 @@ Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(30) Scan parquet +(30) Sort +Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(31) Scan parquet Output [2]: [p_partkey#X, p_type#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_partkey)] ReadSchema: struct -(31) Filter +(32) Filter Input [2]: [p_partkey#X, p_type#X] Condition : isnotnull(p_partkey#X) -(32) Exchange +(33) Exchange Input [2]: [p_partkey#X, p_type#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(33) ShuffledHashJoin +(34) Sort +Input [2]: [p_partkey#X, p_type#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(35) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(34) Project +(36) Project Output [3]: [l_extendedprice#X, l_discount#X, p_type#X] Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] -(35) HashAggregate +(37) HashAggregate Input [3]: [l_extendedprice#X, l_discount#X, p_type#X] Keys: [] Functions [2]: [partial_sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END), partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Results [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -(36) HashAggregate +(38) HashAggregate Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Keys: [] Functions [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END), sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END)#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [1]: [CheckOverflow((promote_precision(CheckOverflow((100.0000 * promote_precision(sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) ELSE 0.0000 END)#X)), DecimalType(38,6))) / promote_precision(cast(sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X as decimal(38,6)))), DecimalType(38,6)) AS promo_revenue#X] -(37) AdaptiveSparkPlan +(39) AdaptiveSparkPlan Output [1]: [promo_revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/15.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/15.txt index fec017400c11..88730deb3c32 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/15.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/15.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (45) +AdaptiveSparkPlan (47) +- == Final Plan == VeloxColumnarToRowExec (30) +- AQEShuffleRead (29) @@ -27,20 +27,22 @@ AdaptiveSparkPlan (45) +- ^ FilterExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (44) - +- Exchange (43) - +- Project (42) - +- ShuffledHashJoin Inner BuildLeft (41) - :- Exchange (33) - : +- Filter (32) - : +- Scan parquet (31) - +- Filter (40) - +- HashAggregate (39) - +- Exchange (38) - +- HashAggregate (37) - +- Project (36) - +- Filter (35) - +- Scan parquet (34) + Sort (46) + +- Exchange (45) + +- Project (44) + +- SortMergeJoin Inner (43) + :- Sort (34) + : +- Exchange (33) + : +- Filter (32) + : +- Scan parquet (31) + +- Sort (42) + +- Filter (41) + +- HashAggregate (40) + +- Exchange (39) + +- HashAggregate (38) + +- Project (37) + +- Filter (36) + +- Scan parquet (35) (1) Scan parquet @@ -186,221 +188,229 @@ Condition : isnotnull(s_suppkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(34) Scan parquet +(34) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(35) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01), IsNotNull(l_suppkey)] ReadSchema: struct -(35) Filter +(36) Filter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : (((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) AND isnotnull(l_suppkey#X)) -(36) Project +(37) Project Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(37) HashAggregate +(38) HashAggregate Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(38) Exchange +(39) Exchange Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(39) HashAggregate +(40) HashAggregate Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [2]: [l_suppkey#X AS supplier_no#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS total_revenue#X] -(40) Filter +(41) Filter Input [2]: [supplier_no#X, total_revenue#X] Condition : (isnotnull(total_revenue#X) AND (total_revenue#X = Subquery subquery#X, [id=#X])) -(41) ShuffledHashJoin +(42) Sort +Input [2]: [supplier_no#X, total_revenue#X] +Arguments: [supplier_no#X ASC NULLS FIRST], false, 0 + +(43) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [supplier_no#X] Join condition: None -(42) Project +(44) Project Output [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Input [6]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, supplier_no#X, total_revenue#X] -(43) Exchange +(45) Exchange Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: rangepartitioning(s_suppkey#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(44) Sort +(46) Sort Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: [s_suppkey#X ASC NULLS FIRST], true, 0 -(45) AdaptiveSparkPlan +(47) AdaptiveSparkPlan Output [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 22 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (71) +AdaptiveSparkPlan (73) +- == Final Plan == - VeloxColumnarToRowExec (62) - +- ^ RegularHashAggregateExecTransformer (60) - +- ^ RegularHashAggregateExecTransformer (59) - +- ^ ProjectExecTransformer (58) - +- ^ RegularHashAggregateExecTransformer (57) - +- ^ InputIteratorTransformer (56) - +- ShuffleQueryStage (54), Statistics(X) - +- ColumnarExchange (53) - +- VeloxAppendBatches (52) - +- ^ ProjectExecTransformer (50) - +- ^ FlushableHashAggregateExecTransformer (49) - +- ^ ProjectExecTransformer (48) - +- ^ FilterExecTransformer (47) - +- ^ Scan parquet (46) + VeloxColumnarToRowExec (64) + +- ^ RegularHashAggregateExecTransformer (62) + +- ^ RegularHashAggregateExecTransformer (61) + +- ^ ProjectExecTransformer (60) + +- ^ RegularHashAggregateExecTransformer (59) + +- ^ InputIteratorTransformer (58) + +- ShuffleQueryStage (56), Statistics(X) + +- ColumnarExchange (55) + +- VeloxAppendBatches (54) + +- ^ ProjectExecTransformer (52) + +- ^ FlushableHashAggregateExecTransformer (51) + +- ^ ProjectExecTransformer (50) + +- ^ FilterExecTransformer (49) + +- ^ Scan parquet (48) +- == Initial Plan == - HashAggregate (70) - +- HashAggregate (69) - +- HashAggregate (68) - +- Exchange (67) - +- HashAggregate (66) - +- Project (65) - +- Filter (64) - +- Scan parquet (63) + HashAggregate (72) + +- HashAggregate (71) + +- HashAggregate (70) + +- Exchange (69) + +- HashAggregate (68) + +- Project (67) + +- Filter (66) + +- Scan parquet (65) -(46) Scan parquet +(48) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01)] ReadSchema: struct -(47) FilterExecTransformer +(49) FilterExecTransformer Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: ((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) -(48) ProjectExecTransformer +(50) ProjectExecTransformer Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) AS _pre_X#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(49) FlushableHashAggregateExecTransformer +(51) FlushableHashAggregateExecTransformer Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, _pre_X#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum(_pre_X#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(50) ProjectExecTransformer +(52) ProjectExecTransformer Output [4]: [hash(l_suppkey#X, 42) AS hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] Input [3]: [l_suppkey#X, sum#X, isEmpty#X] -(51) WholeStageCodegenTransformer (X) +(53) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] Arguments: false -(52) VeloxAppendBatches +(54) VeloxAppendBatches Input [4]: [hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] Arguments: X -(53) ColumnarExchange +(55) ColumnarExchange Input [4]: [hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [l_suppkey#X, sum#X, isEmpty#X], [plan_id=X], [id=#X] -(54) ShuffleQueryStage +(56) ShuffleQueryStage Output [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: X -(55) InputAdapter +(57) InputAdapter Input [3]: [l_suppkey#X, sum#X, isEmpty#X] -(56) InputIteratorTransformer +(58) InputIteratorTransformer Input [3]: [l_suppkey#X, sum#X, isEmpty#X] -(57) RegularHashAggregateExecTransformer +(59) RegularHashAggregateExecTransformer Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [2]: [l_suppkey#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] -(58) ProjectExecTransformer +(60) ProjectExecTransformer Output [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS total_revenue#X] Input [2]: [l_suppkey#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] -(59) RegularHashAggregateExecTransformer +(61) RegularHashAggregateExecTransformer Input [1]: [total_revenue#X] Keys: [] Functions [1]: [partial_max(total_revenue#X)] Aggregate Attributes [1]: [max#X] Results [1]: [max#X] -(60) RegularHashAggregateExecTransformer +(62) RegularHashAggregateExecTransformer Input [1]: [max#X] Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(61) WholeStageCodegenTransformer (X) +(63) WholeStageCodegenTransformer (X) Input [1]: [max(total_revenue)#X] Arguments: false -(62) VeloxColumnarToRowExec +(64) VeloxColumnarToRowExec Input [1]: [max(total_revenue)#X] -(63) Scan parquet +(65) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01)] ReadSchema: struct -(64) Filter +(66) Filter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) -(65) Project +(67) Project Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(66) HashAggregate +(68) HashAggregate Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(67) Exchange +(69) Exchange Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(68) HashAggregate +(70) HashAggregate Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS total_revenue#X] -(69) HashAggregate +(71) HashAggregate Input [1]: [total_revenue#X] Keys: [] Functions [1]: [partial_max(total_revenue#X)] Aggregate Attributes [1]: [max#X] Results [1]: [max#X] -(70) HashAggregate +(72) HashAggregate Input [1]: [max#X] Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(71) AdaptiveSparkPlan +(73) AdaptiveSparkPlan Output [1]: [max(total_revenue)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/16.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/16.txt index 15dd2fa6da8e..535b6940301d 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/16.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/16.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (69) +AdaptiveSparkPlan (71) +- == Final Plan == VeloxColumnarToRowExec (47) +- ^ SortExecTransformer (45) @@ -38,27 +38,29 @@ AdaptiveSparkPlan (69) +- ^ FilterExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (68) - +- Exchange (67) - +- HashAggregate (66) - +- Exchange (65) - +- HashAggregate (64) - +- HashAggregate (63) - +- Exchange (62) - +- HashAggregate (61) - +- Project (60) - +- ShuffledHashJoin Inner BuildRight (59) - :- Exchange (55) - : +- BroadcastHashJoin LeftAnti BuildRight (54) - : :- Filter (49) - : : +- Scan parquet (48) - : +- BroadcastExchange (53) - : +- Project (52) - : +- Filter (51) - : +- Scan parquet (50) - +- Exchange (58) - +- Filter (57) - +- Scan parquet (56) + Sort (70) + +- Exchange (69) + +- HashAggregate (68) + +- Exchange (67) + +- HashAggregate (66) + +- HashAggregate (65) + +- Exchange (64) + +- HashAggregate (63) + +- Project (62) + +- SortMergeJoin Inner (61) + :- Sort (56) + : +- Exchange (55) + : +- BroadcastHashJoin LeftAnti BuildRight (54) + : :- Filter (49) + : : +- Scan parquet (48) + : +- BroadcastExchange (53) + : +- Project (52) + : +- Filter (51) + : +- Scan parquet (50) + +- Sort (60) + +- Exchange (59) + +- Filter (58) + +- Scan parquet (57) (1) Scan parquet @@ -296,74 +298,82 @@ Join condition: None Input [2]: [ps_partkey#X, ps_suppkey#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(56) Scan parquet +(56) Sort +Input [2]: [ps_partkey#X, ps_suppkey#X] +Arguments: [ps_partkey#X ASC NULLS FIRST], false, 0 + +(57) Scan parquet Output [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_brand), IsNotNull(p_type), Not(EqualTo(p_brand,Brand#X)), Not(StringStartsWith(p_type,MEDIUM POLISHED)), In(p_size, [14,19,23,3,36,45,49,9]), IsNotNull(p_partkey)] ReadSchema: struct -(57) Filter +(58) Filter Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Condition : (((((isnotnull(p_brand#X) AND isnotnull(p_type#X)) AND NOT (p_brand#X = Brand#X)) AND NOT StartsWith(p_type#X, MEDIUM POLISHED)) AND p_size#X IN (49,14,23,45,19,3,36,9)) AND isnotnull(p_partkey#X)) -(58) Exchange +(59) Exchange Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(59) ShuffledHashJoin +(60) Sort +Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(61) SortMergeJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(60) Project +(62) Project Output [4]: [ps_suppkey#X, p_brand#X, p_type#X, p_size#X] Input [6]: [ps_partkey#X, ps_suppkey#X, p_partkey#X, p_brand#X, p_type#X, p_size#X] -(61) HashAggregate +(63) HashAggregate Input [4]: [ps_suppkey#X, p_brand#X, p_type#X, p_size#X] Keys [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Functions: [] Aggregate Attributes: [] Results [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] -(62) Exchange +(64) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Arguments: hashpartitioning(p_brand#X, p_type#X, p_size#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(63) HashAggregate +(65) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Keys [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Functions: [] Aggregate Attributes: [] Results [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] -(64) HashAggregate +(66) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Keys [3]: [p_brand#X, p_type#X, p_size#X] Functions [1]: [partial_count(distinct ps_suppkey#X)] Aggregate Attributes [1]: [count(ps_suppkey#X)#X] Results [4]: [p_brand#X, p_type#X, p_size#X, count#X] -(65) Exchange +(67) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] Arguments: hashpartitioning(p_brand#X, p_type#X, p_size#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(66) HashAggregate +(68) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] Keys [3]: [p_brand#X, p_type#X, p_size#X] Functions [1]: [count(distinct ps_suppkey#X)] Aggregate Attributes [1]: [count(ps_suppkey#X)#X] Results [4]: [p_brand#X, p_type#X, p_size#X, count(ps_suppkey#X)#X AS supplier_cnt#X] -(67) Exchange +(69) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: rangepartitioning(supplier_cnt#X DESC NULLS LAST, p_brand#X ASC NULLS FIRST, p_type#X ASC NULLS FIRST, p_size#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(68) Sort +(70) Sort Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: [supplier_cnt#X DESC NULLS LAST, p_brand#X ASC NULLS FIRST, p_type#X ASC NULLS FIRST, p_size#X ASC NULLS FIRST], true, 0 -(69) AdaptiveSparkPlan +(71) AdaptiveSparkPlan Output [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/17.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/17.txt index 69af0fd38e92..d360b6c948e3 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/17.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/17.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (60) +AdaptiveSparkPlan (63) +- == Final Plan == VeloxColumnarToRowExec (40) +- ^ ProjectExecTransformer (38) @@ -35,25 +35,28 @@ AdaptiveSparkPlan (60) +- ^ FilterExecTransformer (22) +- ^ Scan parquet (21) +- == Initial Plan == - HashAggregate (59) - +- HashAggregate (58) - +- Project (57) - +- ShuffledHashJoin Inner BuildRight (56) - :- Project (49) - : +- ShuffledHashJoin Inner BuildRight (48) - : :- Exchange (43) - : : +- Filter (42) - : : +- Scan parquet (41) - : +- Exchange (47) - : +- Project (46) - : +- Filter (45) - : +- Scan parquet (44) - +- Filter (55) - +- HashAggregate (54) - +- Exchange (53) - +- HashAggregate (52) - +- Filter (51) - +- Scan parquet (50) + HashAggregate (62) + +- HashAggregate (61) + +- Project (60) + +- SortMergeJoin Inner (59) + :- Project (51) + : +- SortMergeJoin Inner (50) + : :- Sort (44) + : : +- Exchange (43) + : : +- Filter (42) + : : +- Scan parquet (41) + : +- Sort (49) + : +- Exchange (48) + : +- Project (47) + : +- Filter (46) + : +- Scan parquet (45) + +- Sort (58) + +- Filter (57) + +- HashAggregate (56) + +- Exchange (55) + +- HashAggregate (54) + +- Filter (53) + +- Scan parquet (52) (1) Scan parquet @@ -247,90 +250,102 @@ Condition : (isnotnull(l_partkey#X) AND isnotnull(l_quantity#X)) Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(44) Scan parquet +(44) Sort +Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(45) Scan parquet Output [3]: [p_partkey#X, p_brand#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_brand), IsNotNull(p_container), EqualTo(p_brand,Brand#X), EqualTo(p_container,MED BOX), IsNotNull(p_partkey)] ReadSchema: struct -(45) Filter +(46) Filter Input [3]: [p_partkey#X, p_brand#X, p_container#X] Condition : ((((isnotnull(p_brand#X) AND isnotnull(p_container#X)) AND (p_brand#X = Brand#X)) AND (p_container#X = MED BOX)) AND isnotnull(p_partkey#X)) -(46) Project +(47) Project Output [1]: [p_partkey#X] Input [3]: [p_partkey#X, p_brand#X, p_container#X] -(47) Exchange +(48) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) ShuffledHashJoin +(49) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(50) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(49) Project +(51) Project Output [3]: [l_quantity#X, l_extendedprice#X, p_partkey#X] Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, p_partkey#X] -(50) Scan parquet +(52) Scan parquet Output [2]: [l_partkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey)] ReadSchema: struct -(51) Filter +(53) Filter Input [2]: [l_partkey#X, l_quantity#X] Condition : isnotnull(l_partkey#X) -(52) HashAggregate +(54) HashAggregate Input [2]: [l_partkey#X, l_quantity#X] Keys [1]: [l_partkey#X] Functions [1]: [partial_avg(l_quantity#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [3]: [l_partkey#X, sum#X, count#X] -(53) Exchange +(55) Exchange Input [3]: [l_partkey#X, sum#X, count#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(54) HashAggregate +(56) HashAggregate Input [3]: [l_partkey#X, sum#X, count#X] Keys [1]: [l_partkey#X] Functions [1]: [avg(l_quantity#X)] Aggregate Attributes [1]: [avg(l_quantity#X)#X] Results [2]: [CheckOverflow((0.200000 * promote_precision(avg(l_quantity#X)#X)), DecimalType(18,7)) AS (0.2 * avg(l_quantity))#X, l_partkey#X] -(55) Filter +(57) Filter Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] Condition : isnotnull((0.2 * avg(l_quantity))#X) -(56) ShuffledHashJoin +(58) Sort +Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(59) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join condition: (cast(l_quantity#X as decimal(18,7)) < (0.2 * avg(l_quantity))#X) -(57) Project +(60) Project Output [1]: [l_extendedprice#X] Input [5]: [l_quantity#X, l_extendedprice#X, p_partkey#X, (0.2 * avg(l_quantity))#X, l_partkey#X] -(58) HashAggregate +(61) HashAggregate Input [1]: [l_extendedprice#X] Keys: [] Functions [1]: [partial_sum(l_extendedprice#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(59) HashAggregate +(62) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(l_extendedprice#X)] Aggregate Attributes [1]: [sum(l_extendedprice#X)#X] Results [1]: [CheckOverflow((promote_precision(sum(l_extendedprice#X)#X) / 7.00), DecimalType(27,6)) AS avg_yearly#X] -(60) AdaptiveSparkPlan +(63) AdaptiveSparkPlan Output [1]: [avg_yearly#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/18.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/18.txt index 8db05ed7572c..a664adfd3175 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/18.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/18.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (103) +AdaptiveSparkPlan (110) +- == Final Plan == VeloxColumnarToRowExec (70) +- TakeOrderedAndProjectExecTransformer (69) @@ -58,38 +58,45 @@ AdaptiveSparkPlan (103) +- ShuffleQueryStage (57), Statistics(X) +- ReusedExchange (56) +- == Initial Plan == - TakeOrderedAndProject (102) - +- HashAggregate (101) - +- HashAggregate (100) - +- Project (99) - +- ShuffledHashJoin Inner BuildRight (98) - :- Exchange (87) - : +- Project (86) - : +- ShuffledHashJoin Inner BuildLeft (85) - : :- Exchange (73) - : : +- Filter (72) - : : +- Scan parquet (71) - : +- Exchange (84) - : +- ShuffledHashJoin LeftSemi BuildRight (83) - : :- Exchange (76) - : : +- Filter (75) - : : +- Scan parquet (74) - : +- Project (82) - : +- Filter (81) - : +- HashAggregate (80) - : +- Exchange (79) - : +- HashAggregate (78) - : +- Scan parquet (77) - +- ShuffledHashJoin LeftSemi BuildRight (97) - :- Exchange (90) - : +- Filter (89) - : +- Scan parquet (88) - +- Project (96) - +- Filter (95) - +- HashAggregate (94) - +- Exchange (93) - +- HashAggregate (92) - +- Scan parquet (91) + TakeOrderedAndProject (109) + +- HashAggregate (108) + +- HashAggregate (107) + +- Project (106) + +- SortMergeJoin Inner (105) + :- Sort (92) + : +- Exchange (91) + : +- Project (90) + : +- SortMergeJoin Inner (89) + : :- Sort (74) + : : +- Exchange (73) + : : +- Filter (72) + : : +- Scan parquet (71) + : +- Sort (88) + : +- Exchange (87) + : +- SortMergeJoin LeftSemi (86) + : :- Sort (78) + : : +- Exchange (77) + : : +- Filter (76) + : : +- Scan parquet (75) + : +- Sort (85) + : +- Project (84) + : +- Filter (83) + : +- HashAggregate (82) + : +- Exchange (81) + : +- HashAggregate (80) + : +- Scan parquet (79) + +- SortMergeJoin LeftSemi (104) + :- Sort (96) + : +- Exchange (95) + : +- Filter (94) + : +- Scan parquet (93) + +- Sort (103) + +- Project (102) + +- Filter (101) + +- HashAggregate (100) + +- Exchange (99) + +- HashAggregate (98) + +- Scan parquet (97) (1) Scan parquet @@ -401,154 +408,182 @@ Condition : isnotnull(c_custkey#X) Input [2]: [c_custkey#X, c_name#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) Scan parquet +(74) Sort +Input [2]: [c_custkey#X, c_name#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(75) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(75) Filter +(76) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Condition : (isnotnull(o_custkey#X) AND isnotnull(o_orderkey#X)) -(76) Exchange +(77) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(77) Scan parquet +(78) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(79) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(78) HashAggregate +(80) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(79) Exchange +(81) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(80) HashAggregate +(82) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(81) Filter +(83) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(82) Project +(84) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(83) ShuffledHashJoin +(85) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(86) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(84) Exchange +(87) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(85) ShuffledHashJoin +(88) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(89) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(86) Project +(90) Project Output [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] -(87) Exchange +(91) Exchange Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(88) Scan parquet +(92) Sort +Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(93) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_orderkey)] ReadSchema: struct -(89) Filter +(94) Filter Input [2]: [l_orderkey#X, l_quantity#X] Condition : isnotnull(l_orderkey#X) -(90) Exchange +(95) Exchange Input [2]: [l_orderkey#X, l_quantity#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(91) Scan parquet +(96) Sort +Input [2]: [l_orderkey#X, l_quantity#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(97) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(92) HashAggregate +(98) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(93) Exchange +(99) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(94) HashAggregate +(100) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(95) Filter +(101) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(96) Project +(102) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(97) ShuffledHashJoin +(103) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(104) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(98) ShuffledHashJoin +(105) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(99) Project +(106) Project Output [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_orderkey#X, l_quantity#X] -(100) HashAggregate +(107) HashAggregate Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] -(101) HashAggregate +(108) HashAggregate Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity#X)#X AS sum(l_quantity)#X] -(102) TakeOrderedAndProject +(109) TakeOrderedAndProject Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: X, [o_totalprice#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] -(103) AdaptiveSparkPlan +(110) AdaptiveSparkPlan Output [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/19.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/19.txt index 14a5515d1e79..58e80362020f 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/19.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/19.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (36) +AdaptiveSparkPlan (38) +- == Final Plan == VeloxColumnarToRowExec (24) +- ^ RegularHashAggregateExecTransformer (22) @@ -21,17 +21,19 @@ AdaptiveSparkPlan (36) +- ^ FilterExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - HashAggregate (35) - +- HashAggregate (34) - +- Project (33) - +- ShuffledHashJoin Inner BuildRight (32) - :- Exchange (28) - : +- Project (27) - : +- Filter (26) - : +- Scan parquet (25) - +- Exchange (31) - +- Filter (30) - +- Scan parquet (29) + HashAggregate (37) + +- HashAggregate (36) + +- Project (35) + +- SortMergeJoin Inner (34) + :- Sort (29) + : +- Exchange (28) + : +- Project (27) + : +- Filter (26) + : +- Scan parquet (25) + +- Sort (33) + +- Exchange (32) + +- Filter (31) + +- Scan parquet (30) (1) Scan parquet @@ -157,44 +159,52 @@ Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipin Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(29) Scan parquet +(29) Sort +Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(30) Scan parquet Output [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_size), GreaterThanOrEqual(p_size,1), IsNotNull(p_partkey), Or(Or(And(And(EqualTo(p_brand,Brand#X),In(p_container, [SM BOX,SM CASE,SM PACK,SM PKG])),LessThanOrEqual(p_size,5)),And(And(EqualTo(p_brand,Brand#X),In(p_container, [MED BAG,MED BOX,MED PACK,MED PKG])),LessThanOrEqual(p_size,10))),And(And(EqualTo(p_brand,Brand#X),In(p_container, [LG BOX,LG CASE,LG PACK,LG PKG])),LessThanOrEqual(p_size,15)))] ReadSchema: struct -(30) Filter +(31) Filter Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Condition : (((isnotnull(p_size#X) AND (p_size#X >= 1)) AND isnotnull(p_partkey#X)) AND (((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (p_size#X <= 5)) OR (((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (p_size#X <= 10))) OR (((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (p_size#X <= 15)))) -(31) Exchange +(32) Exchange Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(32) ShuffledHashJoin +(33) Sort +Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(34) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: (((((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (l_quantity#X >= 1.00)) AND (l_quantity#X <= 11.00)) AND (p_size#X <= 5)) OR (((((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (l_quantity#X >= 10.00)) AND (l_quantity#X <= 20.00)) AND (p_size#X <= 10))) OR (((((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (l_quantity#X >= 20.00)) AND (l_quantity#X <= 30.00)) AND (p_size#X <= 15))) -(33) Project +(35) Project Output [2]: [l_extendedprice#X, l_discount#X] Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_brand#X, p_size#X, p_container#X] -(34) HashAggregate +(36) HashAggregate Input [2]: [l_extendedprice#X, l_discount#X] Keys: [] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(35) HashAggregate +(37) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS revenue#X] -(36) AdaptiveSparkPlan +(38) AdaptiveSparkPlan Output [1]: [revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/20.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/20.txt index 7b840720bc90..c22b822e6f7d 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/20.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/20.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (133) +AdaptiveSparkPlan (143) +- == Final Plan == VeloxColumnarToRowExec (93) +- AQEShuffleRead (92) @@ -75,45 +75,55 @@ AdaptiveSparkPlan (133) +- ^ FilterExecTransformer (78) +- ^ Scan parquet (77) +- == Initial Plan == - Sort (132) - +- Exchange (131) - +- Project (130) - +- ShuffledHashJoin Inner BuildRight (129) - :- Exchange (124) - : +- Project (123) - : +- ShuffledHashJoin LeftSemi BuildRight (122) - : :- Exchange (96) - : : +- Filter (95) - : : +- Scan parquet (94) - : +- Exchange (121) - : +- Project (120) - : +- ShuffledHashJoin Inner BuildLeft (119) - : :- Exchange (105) - : : +- ShuffledHashJoin LeftSemi BuildRight (104) - : : :- Exchange (99) - : : : +- Filter (98) - : : : +- Scan parquet (97) - : : +- Exchange (103) - : : +- Project (102) - : : +- Filter (101) - : : +- Scan parquet (100) - : +- Exchange (118) - : +- Filter (117) - : +- HashAggregate (116) - : +- HashAggregate (115) - : +- ShuffledHashJoin LeftSemi BuildRight (114) - : :- Exchange (109) - : : +- Project (108) - : : +- Filter (107) - : : +- Scan parquet (106) - : +- Exchange (113) - : +- Project (112) - : +- Filter (111) - : +- Scan parquet (110) - +- Exchange (128) - +- Project (127) - +- Filter (126) - +- Scan parquet (125) + Sort (142) + +- Exchange (141) + +- Project (140) + +- SortMergeJoin Inner (139) + :- Sort (133) + : +- Exchange (132) + : +- Project (131) + : +- SortMergeJoin LeftSemi (130) + : :- Sort (97) + : : +- Exchange (96) + : : +- Filter (95) + : : +- Scan parquet (94) + : +- Sort (129) + : +- Exchange (128) + : +- Project (127) + : +- SortMergeJoin Inner (126) + : :- Sort (109) + : : +- Exchange (108) + : : +- SortMergeJoin LeftSemi (107) + : : :- Sort (101) + : : : +- Exchange (100) + : : : +- Filter (99) + : : : +- Scan parquet (98) + : : +- Sort (106) + : : +- Exchange (105) + : : +- Project (104) + : : +- Filter (103) + : : +- Scan parquet (102) + : +- Sort (125) + : +- Exchange (124) + : +- Filter (123) + : +- HashAggregate (122) + : +- HashAggregate (121) + : +- SortMergeJoin LeftSemi (120) + : :- Sort (114) + : : +- Exchange (113) + : : +- Project (112) + : : +- Filter (111) + : : +- Scan parquet (110) + : +- Sort (119) + : +- Exchange (118) + : +- Project (117) + : +- Filter (116) + : +- Scan parquet (115) + +- Sort (138) + +- Exchange (137) + +- Project (136) + +- Filter (135) + +- Scan parquet (134) (1) Scan parquet @@ -507,176 +517,216 @@ Condition : isnotnull(s_nationkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(97) Scan parquet +(97) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(98) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_availqty), IsNotNull(ps_partkey), IsNotNull(ps_suppkey)] ReadSchema: struct -(98) Filter +(99) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Condition : ((isnotnull(ps_availqty#X) AND isnotnull(ps_partkey#X)) AND isnotnull(ps_suppkey#X)) -(99) Exchange +(100) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(100) Scan parquet +(101) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: [ps_partkey#X ASC NULLS FIRST], false, 0 + +(102) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(101) Filter +(103) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(102) Project +(104) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(103) Exchange +(105) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(104) ShuffledHashJoin +(106) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(107) SortMergeJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(105) Exchange +(108) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(106) Scan parquet +(109) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: [ps_partkey#X ASC NULLS FIRST, ps_suppkey#X ASC NULLS FIRST], false, 0 + +(110) Scan parquet Output [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)] ReadSchema: struct -(107) Filter +(111) Filter Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND isnotnull(l_partkey#X)) AND isnotnull(l_suppkey#X)) -(108) Project +(112) Project Output [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] -(109) Exchange +(113) Exchange Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(110) Scan parquet +(114) Sort +Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(115) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(111) Filter +(116) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(112) Project +(117) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(113) Exchange +(118) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(119) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(120) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join condition: None -(115) HashAggregate +(121) HashAggregate Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -(116) HashAggregate +(122) HashAggregate Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [CheckOverflow((0.50 * promote_precision(sum(l_quantity#X)#X)), DecimalType(24,3)) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(117) Filter +(123) Filter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Condition : isnotnull((0.5 * sum(l_quantity))#X) -(118) Exchange +(124) Exchange Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(119) ShuffledHashJoin +(125) Sort +Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] +Arguments: [l_partkey#X ASC NULLS FIRST, l_suppkey#X ASC NULLS FIRST], false, 0 + +(126) SortMergeJoin Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join condition: (cast(ps_availqty#X as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(120) Project +(127) Project Output [1]: [ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(121) Exchange +(128) Exchange Input [1]: [ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(122) ShuffledHashJoin +(129) Sort +Input [1]: [ps_suppkey#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(130) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join condition: None -(123) Project +(131) Project Output [3]: [s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(124) Exchange +(132) Exchange Input [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(125) Scan parquet +(133) Sort +Input [3]: [s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(134) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(126) Filter +(135) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(127) Project +(136) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(128) Exchange +(137) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) ShuffledHashJoin +(138) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(139) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(130) Project +(140) Project Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(131) Exchange +(141) Exchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) Sort +(142) Sort Input [2]: [s_name#X, s_address#X] Arguments: [s_name#X ASC NULLS FIRST], true, 0 -(133) AdaptiveSparkPlan +(143) AdaptiveSparkPlan Output [2]: [s_name#X, s_address#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/21.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/21.txt index 5c05ec24757e..8413e2f8f232 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/21.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/21.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (128) +AdaptiveSparkPlan (137) +- == Final Plan == VeloxColumnarToRowExec (91) +- ^ RegularHashAggregateExecTransformer (89) @@ -72,42 +72,51 @@ AdaptiveSparkPlan (128) +- ^ FilterExecTransformer (71) +- ^ Scan parquet (70) +- == Initial Plan == - TakeOrderedAndProject (127) - +- HashAggregate (126) - +- Exchange (125) - +- HashAggregate (124) - +- Project (123) - +- ShuffledHashJoin Inner BuildRight (122) - :- Exchange (117) - : +- Project (116) - : +- ShuffledHashJoin Inner BuildRight (115) - : :- Exchange (110) - : : +- Project (109) - : : +- ShuffledHashJoin Inner BuildLeft (108) - : : :- Exchange (94) - : : : +- Filter (93) - : : : +- Scan parquet (92) - : : +- Exchange (107) - : : +- ShuffledHashJoin LeftAnti BuildRight (106) - : : :- ShuffledHashJoin LeftSemi BuildRight (101) - : : : :- Exchange (98) - : : : : +- Project (97) - : : : : +- Filter (96) - : : : : +- Scan parquet (95) - : : : +- Exchange (100) - : : : +- Scan parquet (99) - : : +- Exchange (105) - : : +- Project (104) - : : +- Filter (103) - : : +- Scan parquet (102) - : +- Exchange (114) - : +- Project (113) - : +- Filter (112) - : +- Scan parquet (111) - +- Exchange (121) - +- Project (120) - +- Filter (119) - +- Scan parquet (118) + TakeOrderedAndProject (136) + +- HashAggregate (135) + +- Exchange (134) + +- HashAggregate (133) + +- Project (132) + +- SortMergeJoin Inner (131) + :- Sort (125) + : +- Exchange (124) + : +- Project (123) + : +- SortMergeJoin Inner (122) + : :- Sort (116) + : : +- Exchange (115) + : : +- Project (114) + : : +- SortMergeJoin Inner (113) + : : :- Sort (95) + : : : +- Exchange (94) + : : : +- Filter (93) + : : : +- Scan parquet (92) + : : +- Sort (112) + : : +- Exchange (111) + : : +- SortMergeJoin LeftAnti (110) + : : :- SortMergeJoin LeftSemi (104) + : : : :- Sort (100) + : : : : +- Exchange (99) + : : : : +- Project (98) + : : : : +- Filter (97) + : : : : +- Scan parquet (96) + : : : +- Sort (103) + : : : +- Exchange (102) + : : : +- Scan parquet (101) + : : +- Sort (109) + : : +- Exchange (108) + : : +- Project (107) + : : +- Filter (106) + : : +- Scan parquet (105) + : +- Sort (121) + : +- Exchange (120) + : +- Project (119) + : +- Filter (118) + : +- Scan parquet (117) + +- Sort (130) + +- Exchange (129) + +- Project (128) + +- Filter (127) + +- Scan parquet (126) (1) Scan parquet @@ -496,163 +505,199 @@ Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(95) Scan parquet +(95) Sort +Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(96) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(96) Filter +(97) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((((isnotnull(l_receiptdate#X) AND isnotnull(l_commitdate#X)) AND (l_receiptdate#X > l_commitdate#X)) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(97) Project +(98) Project Output [2]: [l_orderkey#X, l_suppkey#X] Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] -(98) Exchange +(99) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(99) Scan parquet +(100) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(101) Scan parquet Output [2]: [l_orderkey#X, l_suppkey#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(100) Exchange +(102) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(101) ShuffledHashJoin +(103) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(104) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: NOT (l_suppkey#X = l_suppkey#X) -(102) Scan parquet +(105) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate)] ReadSchema: struct -(103) Filter +(106) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((isnotnull(l_receiptdate#X) AND isnotnull(l_commitdate#X)) AND (l_receiptdate#X > l_commitdate#X)) -(104) Project +(107) Project Output [2]: [l_orderkey#X, l_suppkey#X] Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] -(105) Exchange +(108) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(106) ShuffledHashJoin +(109) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(110) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: NOT (l_suppkey#X = l_suppkey#X) -(107) Exchange +(111) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(108) ShuffledHashJoin +(112) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(113) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [l_suppkey#X] Join condition: None -(109) Project +(114) Project Output [3]: [s_name#X, s_nationkey#X, l_orderkey#X] Input [5]: [s_suppkey#X, s_name#X, s_nationkey#X, l_orderkey#X, l_suppkey#X] -(110) Exchange +(115) Exchange Input [3]: [s_name#X, s_nationkey#X, l_orderkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(111) Scan parquet +(116) Sort +Input [3]: [s_name#X, s_nationkey#X, l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(117) Scan parquet Output [2]: [o_orderkey#X, o_orderstatus#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderstatus), EqualTo(o_orderstatus,F), IsNotNull(o_orderkey)] ReadSchema: struct -(112) Filter +(118) Filter Input [2]: [o_orderkey#X, o_orderstatus#X] Condition : ((isnotnull(o_orderstatus#X) AND (o_orderstatus#X = F)) AND isnotnull(o_orderkey#X)) -(113) Project +(119) Project Output [1]: [o_orderkey#X] Input [2]: [o_orderkey#X, o_orderstatus#X] -(114) Exchange +(120) Exchange Input [1]: [o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(115) ShuffledHashJoin +(121) Sort +Input [1]: [o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(122) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(116) Project +(123) Project Output [2]: [s_name#X, s_nationkey#X] Input [4]: [s_name#X, s_nationkey#X, l_orderkey#X, o_orderkey#X] -(117) Exchange +(124) Exchange Input [2]: [s_name#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(118) Scan parquet +(125) Sort +Input [2]: [s_name#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(126) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,SAUDI ARABIA), IsNotNull(n_nationkey)] ReadSchema: struct -(119) Filter +(127) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = SAUDI ARABIA)) AND isnotnull(n_nationkey#X)) -(120) Project +(128) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(121) Exchange +(129) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(122) ShuffledHashJoin +(130) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(131) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(123) Project +(132) Project Output [1]: [s_name#X] Input [3]: [s_name#X, s_nationkey#X, n_nationkey#X] -(124) HashAggregate +(133) HashAggregate Input [1]: [s_name#X] Keys [1]: [s_name#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [s_name#X, count#X] -(125) Exchange +(134) Exchange Input [2]: [s_name#X, count#X] Arguments: hashpartitioning(s_name#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) HashAggregate +(135) HashAggregate Input [2]: [s_name#X, count#X] Keys [1]: [s_name#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [s_name#X, count(1)#X AS numwait#X] -(127) TakeOrderedAndProject +(136) TakeOrderedAndProject Input [2]: [s_name#X, numwait#X] Arguments: X, [numwait#X DESC NULLS LAST, s_name#X ASC NULLS FIRST], [s_name#X, numwait#X] -(128) AdaptiveSparkPlan +(137) AdaptiveSparkPlan Output [2]: [s_name#X, numwait#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/22.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/22.txt index af5c086c274a..214b34066a8f 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/22.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/22.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (50) +AdaptiveSparkPlan (52) +- == Final Plan == VeloxColumnarToRowExec (37) +- ^ SortExecTransformer (35) @@ -30,18 +30,20 @@ AdaptiveSparkPlan (50) +- ^ ProjectExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (49) - +- Exchange (48) - +- HashAggregate (47) - +- Exchange (46) - +- HashAggregate (45) - +- Project (44) - +- ShuffledHashJoin LeftAnti BuildRight (43) - :- Exchange (40) - : +- Filter (39) - : +- Scan parquet (38) - +- Exchange (42) - +- Scan parquet (41) + Sort (51) + +- Exchange (50) + +- HashAggregate (49) + +- Exchange (48) + +- HashAggregate (47) + +- Project (46) + +- SortMergeJoin LeftAnti (45) + :- Sort (41) + : +- Exchange (40) + : +- Filter (39) + : +- Scan parquet (38) + +- Sort (44) + +- Exchange (43) + +- Scan parquet (42) (1) Scan parquet @@ -210,191 +212,199 @@ Condition : ((isnotnull(c_acctbal#X) AND substring(c_phone#X, 1, 2) IN (13,31,23 Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(41) Scan parquet +(41) Sort +Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(42) Scan parquet Output [1]: [o_custkey#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(42) Exchange +(43) Exchange Input [1]: [o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) ShuffledHashJoin +(44) Sort +Input [1]: [o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(45) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(44) Project +(46) Project Output [2]: [substring(c_phone#X, 1, 2) AS cntrycode#X, c_acctbal#X] Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] -(45) HashAggregate +(47) HashAggregate Input [2]: [cntrycode#X, c_acctbal#X] Keys [1]: [cntrycode#X] Functions [2]: [partial_count(1), partial_sum(c_acctbal#X)] Aggregate Attributes [3]: [count#X, sum#X, isEmpty#X] Results [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] -(46) Exchange +(48) Exchange Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] Arguments: hashpartitioning(cntrycode#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) HashAggregate +(49) HashAggregate Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] Keys [1]: [cntrycode#X] Functions [2]: [count(1), sum(c_acctbal#X)] Aggregate Attributes [2]: [count(1)#X, sum(c_acctbal#X)#X] Results [3]: [cntrycode#X, count(1)#X AS numcust#X, sum(c_acctbal#X)#X AS totacctbal#X] -(48) Exchange +(50) Exchange Input [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: rangepartitioning(cntrycode#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(49) Sort +(51) Sort Input [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: [cntrycode#X ASC NULLS FIRST], true, 0 -(50) AdaptiveSparkPlan +(52) AdaptiveSparkPlan Output [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 2 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (70) +AdaptiveSparkPlan (72) +- == Final Plan == - VeloxColumnarToRowExec (63) - +- ^ RegularHashAggregateExecTransformer (61) - +- ^ InputIteratorTransformer (60) - +- ShuffleQueryStage (58), Statistics(X) - +- ColumnarExchange (57) - +- VeloxAppendBatches (56) - +- ^ FlushableHashAggregateExecTransformer (54) - +- ^ ProjectExecTransformer (53) - +- ^ FilterExecTransformer (52) - +- ^ Scan parquet (51) + VeloxColumnarToRowExec (65) + +- ^ RegularHashAggregateExecTransformer (63) + +- ^ InputIteratorTransformer (62) + +- ShuffleQueryStage (60), Statistics(X) + +- ColumnarExchange (59) + +- VeloxAppendBatches (58) + +- ^ FlushableHashAggregateExecTransformer (56) + +- ^ ProjectExecTransformer (55) + +- ^ FilterExecTransformer (54) + +- ^ Scan parquet (53) +- == Initial Plan == - HashAggregate (69) - +- Exchange (68) - +- HashAggregate (67) - +- Project (66) - +- Filter (65) - +- Scan parquet (64) + HashAggregate (71) + +- Exchange (70) + +- HashAggregate (69) + +- Project (68) + +- Filter (67) + +- Scan parquet (66) -(51) Scan parquet +(53) Scan parquet Output [2]: [c_phone#X, c_acctbal#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_acctbal), GreaterThan(c_acctbal,0.00)] ReadSchema: struct -(52) FilterExecTransformer +(54) FilterExecTransformer Input [2]: [c_phone#X, c_acctbal#X] Arguments: ((isnotnull(c_acctbal#X) AND (c_acctbal#X > 0.00)) AND substring(c_phone#X, 1, 2) IN (13,31,23,29,30,18,17)) -(53) ProjectExecTransformer +(55) ProjectExecTransformer Output [1]: [c_acctbal#X] Input [2]: [c_phone#X, c_acctbal#X] -(54) FlushableHashAggregateExecTransformer +(56) FlushableHashAggregateExecTransformer Input [1]: [c_acctbal#X] Keys: [] Functions [1]: [partial_avg(c_acctbal#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [2]: [sum#X, count#X] -(55) WholeStageCodegenTransformer (X) +(57) WholeStageCodegenTransformer (X) Input [2]: [sum#X, count#X] Arguments: false -(56) VeloxAppendBatches +(58) VeloxAppendBatches Input [2]: [sum#X, count#X] Arguments: X -(57) ColumnarExchange +(59) ColumnarExchange Input [2]: [sum#X, count#X] Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] -(58) ShuffleQueryStage +(60) ShuffleQueryStage Output [2]: [sum#X, count#X] Arguments: X -(59) InputAdapter +(61) InputAdapter Input [2]: [sum#X, count#X] -(60) InputIteratorTransformer +(62) InputIteratorTransformer Input [2]: [sum#X, count#X] -(61) RegularHashAggregateExecTransformer +(63) RegularHashAggregateExecTransformer Input [2]: [sum#X, count#X] Keys: [] Functions [1]: [avg(c_acctbal#X)] Aggregate Attributes [1]: [avg(c_acctbal#X)#X] Results [1]: [avg(c_acctbal#X)#X AS avg(c_acctbal)#X] -(62) WholeStageCodegenTransformer (X) +(64) WholeStageCodegenTransformer (X) Input [1]: [avg(c_acctbal)#X] Arguments: false -(63) VeloxColumnarToRowExec +(65) VeloxColumnarToRowExec Input [1]: [avg(c_acctbal)#X] -(64) Scan parquet +(66) Scan parquet Output [2]: [c_phone#X, c_acctbal#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_acctbal), GreaterThan(c_acctbal,0.00)] ReadSchema: struct -(65) Filter +(67) Filter Input [2]: [c_phone#X, c_acctbal#X] Condition : ((isnotnull(c_acctbal#X) AND (c_acctbal#X > 0.00)) AND substring(c_phone#X, 1, 2) IN (13,31,23,29,30,18,17)) -(66) Project +(68) Project Output [1]: [c_acctbal#X] Input [2]: [c_phone#X, c_acctbal#X] -(67) HashAggregate +(69) HashAggregate Input [1]: [c_acctbal#X] Keys: [] Functions [1]: [partial_avg(c_acctbal#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [2]: [sum#X, count#X] -(68) Exchange +(70) Exchange Input [2]: [sum#X, count#X] Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=X] -(69) HashAggregate +(71) HashAggregate Input [2]: [sum#X, count#X] Keys: [] Functions [1]: [avg(c_acctbal#X)] Aggregate Attributes [1]: [avg(c_acctbal#X)#X] Results [1]: [avg(c_acctbal#X)#X AS avg(c_acctbal)#X] -(70) AdaptiveSparkPlan +(72) AdaptiveSparkPlan Output [1]: [avg(c_acctbal)#X] Arguments: isFinalPlan=true Subquery:2 Hosting operator id = 1 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (70) +AdaptiveSparkPlan (72) +- == Final Plan == - VeloxColumnarToRowExec (63) - +- ^ RegularHashAggregateExecTransformer (61) - +- ^ InputIteratorTransformer (60) - +- ShuffleQueryStage (58), Statistics(X) - +- ColumnarExchange (57) - +- VeloxAppendBatches (56) - +- ^ FlushableHashAggregateExecTransformer (54) - +- ^ ProjectExecTransformer (53) - +- ^ FilterExecTransformer (52) - +- ^ Scan parquet (51) + VeloxColumnarToRowExec (65) + +- ^ RegularHashAggregateExecTransformer (63) + +- ^ InputIteratorTransformer (62) + +- ShuffleQueryStage (60), Statistics(X) + +- ColumnarExchange (59) + +- VeloxAppendBatches (58) + +- ^ FlushableHashAggregateExecTransformer (56) + +- ^ ProjectExecTransformer (55) + +- ^ FilterExecTransformer (54) + +- ^ Scan parquet (53) +- == Initial Plan == - HashAggregate (69) - +- Exchange (68) - +- HashAggregate (67) - +- Project (66) - +- Filter (65) - +- Scan parquet (64) \ No newline at end of file + HashAggregate (71) + +- Exchange (70) + +- HashAggregate (69) + +- Project (68) + +- Filter (67) + +- Scan parquet (66) \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/3.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/3.txt index 51408f03e4e7..df17819cafe9 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/3.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/3.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (63) +AdaptiveSparkPlan (67) +- == Final Plan == VeloxColumnarToRowExec (43) +- TakeOrderedAndProjectExecTransformer (42) @@ -36,25 +36,29 @@ AdaptiveSparkPlan (63) +- ^ FilterExecTransformer (28) +- ^ Scan parquet (27) +- == Initial Plan == - TakeOrderedAndProject (62) - +- HashAggregate (61) - +- HashAggregate (60) - +- Project (59) - +- ShuffledHashJoin Inner BuildRight (58) - :- Exchange (53) - : +- Project (52) - : +- ShuffledHashJoin Inner BuildLeft (51) - : :- Exchange (47) - : : +- Project (46) - : : +- Filter (45) - : : +- Scan parquet (44) - : +- Exchange (50) - : +- Filter (49) - : +- Scan parquet (48) - +- Exchange (57) - +- Project (56) - +- Filter (55) - +- Scan parquet (54) + TakeOrderedAndProject (66) + +- HashAggregate (65) + +- HashAggregate (64) + +- Project (63) + +- SortMergeJoin Inner (62) + :- Sort (56) + : +- Exchange (55) + : +- Project (54) + : +- SortMergeJoin Inner (53) + : :- Sort (48) + : : +- Exchange (47) + : : +- Project (46) + : : +- Filter (45) + : : +- Scan parquet (44) + : +- Sort (52) + : +- Exchange (51) + : +- Filter (50) + : +- Scan parquet (49) + +- Sort (61) + +- Exchange (60) + +- Project (59) + +- Filter (58) + +- Scan parquet (57) (1) Scan parquet @@ -256,80 +260,96 @@ Input [2]: [c_custkey#X, c_mktsegment#X] Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) Scan parquet +(48) Sort +Input [1]: [c_custkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(49) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), LessThan(o_orderdate,1995-03-15), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(49) Filter +(50) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Condition : (((isnotnull(o_orderdate#X) AND (o_orderdate#X < 1995-03-15)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(50) Exchange +(51) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(51) ShuffledHashJoin +(52) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(53) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(52) Project +(54) Project Output [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Input [5]: [c_custkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] -(53) Exchange +(55) Exchange Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(54) Scan parquet +(56) Sort +Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(57) Scan parquet Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThan(l_shipdate,1995-03-15), IsNotNull(l_orderkey)] ReadSchema: struct -(55) Filter +(58) Filter Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X > 1995-03-15)) AND isnotnull(l_orderkey#X)) -(56) Project +(59) Project Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(57) Exchange +(60) Exchange Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(58) ShuffledHashJoin +(61) Sort +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(62) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(59) Project +(63) Project Output [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] -(60) HashAggregate +(64) HashAggregate Input [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] -(61) HashAggregate +(65) HashAggregate Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [4]: [l_orderkey#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS revenue#X, o_orderdate#X, o_shippriority#X] -(62) TakeOrderedAndProject +(66) TakeOrderedAndProject Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: X, [revenue#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] -(63) AdaptiveSparkPlan +(67) AdaptiveSparkPlan Output [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/4.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/4.txt index 4da32d7f70ac..85d303df874f 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/4.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/4.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (54) +AdaptiveSparkPlan (56) +- == Final Plan == VeloxColumnarToRowExec (38) +- ^ SortExecTransformer (36) @@ -31,21 +31,23 @@ AdaptiveSparkPlan (54) +- ^ FilterExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (53) - +- Exchange (52) - +- HashAggregate (51) - +- Exchange (50) - +- HashAggregate (49) - +- Project (48) - +- ShuffledHashJoin LeftSemi BuildRight (47) - :- Exchange (42) - : +- Project (41) - : +- Filter (40) - : +- Scan parquet (39) - +- Exchange (46) - +- Project (45) - +- Filter (44) - +- Scan parquet (43) + Sort (55) + +- Exchange (54) + +- HashAggregate (53) + +- Exchange (52) + +- HashAggregate (51) + +- Project (50) + +- SortMergeJoin LeftSemi (49) + :- Sort (43) + : +- Exchange (42) + : +- Project (41) + : +- Filter (40) + : +- Scan parquet (39) + +- Sort (48) + +- Exchange (47) + +- Project (46) + +- Filter (45) + +- Scan parquet (44) (1) Scan parquet @@ -223,60 +225,68 @@ Input [3]: [o_orderkey#X, o_orderdate#X, o_orderpriority#X] Input [2]: [o_orderkey#X, o_orderpriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) Scan parquet +(43) Sort +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(44) Scan parquet Output [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate)] ReadSchema: struct -(44) Filter +(45) Filter Input [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((isnotnull(l_commitdate#X) AND isnotnull(l_receiptdate#X)) AND (l_commitdate#X < l_receiptdate#X)) -(45) Project +(46) Project Output [1]: [l_orderkey#X] Input [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] -(46) Exchange +(47) Exchange Input [1]: [l_orderkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) ShuffledHashJoin +(48) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(49) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(48) Project +(50) Project Output [1]: [o_orderpriority#X] Input [2]: [o_orderkey#X, o_orderpriority#X] -(49) HashAggregate +(51) HashAggregate Input [1]: [o_orderpriority#X] Keys [1]: [o_orderpriority#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [o_orderpriority#X, count#X] -(50) Exchange +(52) Exchange Input [2]: [o_orderpriority#X, count#X] Arguments: hashpartitioning(o_orderpriority#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(51) HashAggregate +(53) HashAggregate Input [2]: [o_orderpriority#X, count#X] Keys [1]: [o_orderpriority#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [o_orderpriority#X, count(1)#X AS order_count#X] -(52) Exchange +(54) Exchange Input [2]: [o_orderpriority#X, order_count#X] Arguments: rangepartitioning(o_orderpriority#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(53) Sort +(55) Sort Input [2]: [o_orderpriority#X, order_count#X] Arguments: [o_orderpriority#X ASC NULLS FIRST], true, 0 -(54) AdaptiveSparkPlan +(56) AdaptiveSparkPlan Output [2]: [o_orderpriority#X, order_count#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/5.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/5.txt index 2669a9fce3ae..8978f9563c68 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/5.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/5.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (146) +AdaptiveSparkPlan (156) +- == Final Plan == VeloxColumnarToRowExec (106) +- ^ SortExecTransformer (104) @@ -83,45 +83,55 @@ AdaptiveSparkPlan (146) +- ^ FilterExecTransformer (79) +- ^ Scan parquet (78) +- == Initial Plan == - Sort (145) - +- Exchange (144) - +- HashAggregate (143) - +- Exchange (142) - +- HashAggregate (141) - +- Project (140) - +- ShuffledHashJoin Inner BuildRight (139) - :- Exchange (134) - : +- Project (133) - : +- ShuffledHashJoin Inner BuildRight (132) - : :- Exchange (128) - : : +- Project (127) - : : +- ShuffledHashJoin Inner BuildRight (126) - : : :- Exchange (122) - : : : +- Project (121) - : : : +- ShuffledHashJoin Inner BuildRight (120) - : : : :- Exchange (116) - : : : : +- Project (115) - : : : : +- ShuffledHashJoin Inner BuildLeft (114) - : : : : :- Exchange (109) - : : : : : +- Filter (108) - : : : : : +- Scan parquet (107) - : : : : +- Exchange (113) - : : : : +- Project (112) - : : : : +- Filter (111) - : : : : +- Scan parquet (110) - : : : +- Exchange (119) - : : : +- Filter (118) - : : : +- Scan parquet (117) - : : +- Exchange (125) - : : +- Filter (124) - : : +- Scan parquet (123) - : +- Exchange (131) - : +- Filter (130) - : +- Scan parquet (129) - +- Exchange (138) - +- Project (137) - +- Filter (136) - +- Scan parquet (135) + Sort (155) + +- Exchange (154) + +- HashAggregate (153) + +- Exchange (152) + +- HashAggregate (151) + +- Project (150) + +- SortMergeJoin Inner (149) + :- Sort (143) + : +- Exchange (142) + : +- Project (141) + : +- SortMergeJoin Inner (140) + : :- Sort (135) + : : +- Exchange (134) + : : +- Project (133) + : : +- SortMergeJoin Inner (132) + : : :- Sort (127) + : : : +- Exchange (126) + : : : +- Project (125) + : : : +- SortMergeJoin Inner (124) + : : : :- Sort (119) + : : : : +- Exchange (118) + : : : : +- Project (117) + : : : : +- SortMergeJoin Inner (116) + : : : : :- Sort (110) + : : : : : +- Exchange (109) + : : : : : +- Filter (108) + : : : : : +- Scan parquet (107) + : : : : +- Sort (115) + : : : : +- Exchange (114) + : : : : +- Project (113) + : : : : +- Filter (112) + : : : : +- Scan parquet (111) + : : : +- Sort (123) + : : : +- Exchange (122) + : : : +- Filter (121) + : : : +- Scan parquet (120) + : : +- Sort (131) + : : +- Exchange (130) + : : +- Filter (129) + : : +- Scan parquet (128) + : +- Sort (139) + : +- Exchange (138) + : +- Filter (137) + : +- Scan parquet (136) + +- Sort (148) + +- Exchange (147) + +- Project (146) + +- Filter (145) + +- Scan parquet (144) (1) Scan parquet @@ -567,176 +577,216 @@ Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(110) Scan parquet +(110) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(111) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1994-01-01), LessThan(o_orderdate,1995-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(111) Filter +(112) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1994-01-01)) AND (o_orderdate#X < 1995-01-01)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(112) Project +(113) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] -(113) Exchange +(114) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(115) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(116) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join condition: None -(115) Project +(117) Project Output [2]: [c_nationkey#X, o_orderkey#X] Input [4]: [c_custkey#X, c_nationkey#X, o_orderkey#X, o_custkey#X] -(116) Exchange +(118) Exchange Input [2]: [c_nationkey#X, o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(119) Sort +Input [2]: [c_nationkey#X, o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(120) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_orderkey), IsNotNull(l_suppkey)] ReadSchema: struct -(118) Filter +(121) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Condition : (isnotnull(l_orderkey#X) AND isnotnull(l_suppkey#X)) -(119) Exchange +(122) Exchange Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(123) Sort +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(124) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join condition: None -(121) Project +(125) Project Output [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Input [6]: [c_nationkey#X, o_orderkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] -(122) Exchange +(126) Exchange Input [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(127) Sort +Input [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST, c_nationkey#X ASC NULLS FIRST], false, 0 + +(128) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(124) Filter +(129) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(125) Exchange +(130) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) ShuffledHashJoin +(131) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST, s_nationkey#X ASC NULLS FIRST], false, 0 + +(132) SortMergeJoin Left keys [2]: [l_suppkey#X, c_nationkey#X] Right keys [2]: [s_suppkey#X, s_nationkey#X] Join condition: None -(127) Project +(133) Project Output [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] Input [6]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(128) Exchange +(134) Exchange Input [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Scan parquet +(135) Sort +Input [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(136) Scan parquet Output [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] ReadSchema: struct -(130) Filter +(137) Filter Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Condition : (isnotnull(n_nationkey#X) AND isnotnull(n_regionkey#X)) -(131) Exchange +(138) Exchange Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(139) Sort +Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(140) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(133) Project +(141) Project Output [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] Input [6]: [l_extendedprice#X, l_discount#X, s_nationkey#X, n_nationkey#X, n_name#X, n_regionkey#X] -(134) Exchange +(142) Exchange Input [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] Arguments: hashpartitioning(n_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(135) Scan parquet +(143) Sort +Input [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] +Arguments: [n_regionkey#X ASC NULLS FIRST], false, 0 + +(144) Scan parquet Output [2]: [r_regionkey#X, r_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(r_name), EqualTo(r_name,ASIA), IsNotNull(r_regionkey)] ReadSchema: struct -(136) Filter +(145) Filter Input [2]: [r_regionkey#X, r_name#X] Condition : ((isnotnull(r_name#X) AND (r_name#X = ASIA)) AND isnotnull(r_regionkey#X)) -(137) Project +(146) Project Output [1]: [r_regionkey#X] Input [2]: [r_regionkey#X, r_name#X] -(138) Exchange +(147) Exchange Input [1]: [r_regionkey#X] Arguments: hashpartitioning(r_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(139) ShuffledHashJoin +(148) Sort +Input [1]: [r_regionkey#X] +Arguments: [r_regionkey#X ASC NULLS FIRST], false, 0 + +(149) SortMergeJoin Left keys [1]: [n_regionkey#X] Right keys [1]: [r_regionkey#X] Join condition: None -(140) Project +(150) Project Output [3]: [l_extendedprice#X, l_discount#X, n_name#X] Input [5]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X, r_regionkey#X] -(141) HashAggregate +(151) HashAggregate Input [3]: [l_extendedprice#X, l_discount#X, n_name#X] Keys [1]: [n_name#X] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [n_name#X, sum#X, isEmpty#X] -(142) Exchange +(152) Exchange Input [3]: [n_name#X, sum#X, isEmpty#X] Arguments: hashpartitioning(n_name#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(143) HashAggregate +(153) HashAggregate Input [3]: [n_name#X, sum#X, isEmpty#X] Keys [1]: [n_name#X] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X] Results [2]: [n_name#X, sum(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)))#X AS revenue#X] -(144) Exchange +(154) Exchange Input [2]: [n_name#X, revenue#X] Arguments: rangepartitioning(revenue#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(145) Sort +(155) Sort Input [2]: [n_name#X, revenue#X] Arguments: [revenue#X DESC NULLS LAST], true, 0 -(146) AdaptiveSparkPlan +(156) AdaptiveSparkPlan Output [2]: [n_name#X, revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/7.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/7.txt index b5abf7e36164..244f650f3a72 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/7.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/7.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (139) +AdaptiveSparkPlan (149) +- == Final Plan == VeloxColumnarToRowExec (101) +- ^ SortExecTransformer (99) @@ -79,43 +79,53 @@ AdaptiveSparkPlan (139) +- ShuffleQueryStage (79), Statistics(X) +- ReusedExchange (78) +- == Initial Plan == - Sort (138) - +- Exchange (137) - +- HashAggregate (136) - +- Exchange (135) - +- HashAggregate (134) - +- Project (133) - +- ShuffledHashJoin Inner BuildRight (132) - :- Exchange (128) - : +- Project (127) - : +- ShuffledHashJoin Inner BuildRight (126) - : :- Exchange (122) - : : +- Project (121) - : : +- ShuffledHashJoin Inner BuildRight (120) - : : :- Exchange (116) - : : : +- Project (115) - : : : +- ShuffledHashJoin Inner BuildRight (114) - : : : :- Exchange (110) - : : : : +- Project (109) - : : : : +- ShuffledHashJoin Inner BuildLeft (108) - : : : : :- Exchange (104) - : : : : : +- Filter (103) - : : : : : +- Scan parquet (102) - : : : : +- Exchange (107) - : : : : +- Filter (106) - : : : : +- Scan parquet (105) - : : : +- Exchange (113) - : : : +- Filter (112) - : : : +- Scan parquet (111) - : : +- Exchange (119) - : : +- Filter (118) - : : +- Scan parquet (117) - : +- Exchange (125) - : +- Filter (124) - : +- Scan parquet (123) - +- Exchange (131) - +- Filter (130) - +- Scan parquet (129) + Sort (148) + +- Exchange (147) + +- HashAggregate (146) + +- Exchange (145) + +- HashAggregate (144) + +- Project (143) + +- SortMergeJoin Inner (142) + :- Sort (137) + : +- Exchange (136) + : +- Project (135) + : +- SortMergeJoin Inner (134) + : :- Sort (129) + : : +- Exchange (128) + : : +- Project (127) + : : +- SortMergeJoin Inner (126) + : : :- Sort (121) + : : : +- Exchange (120) + : : : +- Project (119) + : : : +- SortMergeJoin Inner (118) + : : : :- Sort (113) + : : : : +- Exchange (112) + : : : : +- Project (111) + : : : : +- SortMergeJoin Inner (110) + : : : : :- Sort (105) + : : : : : +- Exchange (104) + : : : : : +- Filter (103) + : : : : : +- Scan parquet (102) + : : : : +- Sort (109) + : : : : +- Exchange (108) + : : : : +- Filter (107) + : : : : +- Scan parquet (106) + : : : +- Sort (117) + : : : +- Exchange (116) + : : : +- Filter (115) + : : : +- Scan parquet (114) + : : +- Sort (125) + : : +- Exchange (124) + : : +- Filter (123) + : : +- Scan parquet (122) + : +- Sort (133) + : +- Exchange (132) + : +- Filter (131) + : +- Scan parquet (130) + +- Sort (141) + +- Exchange (140) + +- Filter (139) + +- Scan parquet (138) (1) Scan parquet @@ -537,168 +547,208 @@ Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(105) Scan parquet +(105) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(106) Scan parquet Output [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1995-01-01), LessThanOrEqual(l_shipdate,1996-12-31), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(106) Filter +(107) Filter Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1995-01-01)) AND (l_shipdate#X <= 1996-12-31)) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(107) Exchange +(108) Exchange Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(108) ShuffledHashJoin +(109) Sort +Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(110) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [l_suppkey#X] Join condition: None -(109) Project +(111) Project Output [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Input [7]: [s_suppkey#X, s_nationkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(110) Exchange +(112) Exchange Input [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(111) Scan parquet +(113) Sort +Input [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(114) Scan parquet Output [2]: [o_orderkey#X, o_custkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderkey), IsNotNull(o_custkey)] ReadSchema: struct -(112) Filter +(115) Filter Input [2]: [o_orderkey#X, o_custkey#X] Condition : (isnotnull(o_orderkey#X) AND isnotnull(o_custkey#X)) -(113) Exchange +(116) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(117) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(118) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(115) Project +(119) Project Output [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] Input [7]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_orderkey#X, o_custkey#X] -(116) Exchange +(120) Exchange Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(121) Sort +Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(122) Scan parquet Output [2]: [c_custkey#X, c_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] ReadSchema: struct -(118) Filter +(123) Filter Input [2]: [c_custkey#X, c_nationkey#X] Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) -(119) Exchange +(124) Exchange Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(125) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(126) SortMergeJoin Left keys [1]: [o_custkey#X] Right keys [1]: [c_custkey#X] Join condition: None -(121) Project +(127) Project Output [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X, c_custkey#X, c_nationkey#X] -(122) Exchange +(128) Exchange Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(129) Sort +Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(130) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), Or(EqualTo(n_name,FRANCE),EqualTo(n_name,GERMANY))] ReadSchema: struct -(124) Filter +(131) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : (isnotnull(n_nationkey#X) AND ((n_name#X = FRANCE) OR (n_name#X = GERMANY))) -(125) Exchange +(132) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) ShuffledHashJoin +(133) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(134) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(127) Project +(135) Project Output [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_nationkey#X, n_name#X] -(128) Exchange +(136) Exchange Input [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Scan parquet +(137) Sort +Input [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(138) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), Or(EqualTo(n_name,GERMANY),EqualTo(n_name,FRANCE))] ReadSchema: struct -(130) Filter +(139) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : (isnotnull(n_nationkey#X) AND ((n_name#X = GERMANY) OR (n_name#X = FRANCE))) -(131) Exchange +(140) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(141) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(142) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: (((n_name#X = FRANCE) AND (n_name#X = GERMANY)) OR ((n_name#X = GERMANY) AND (n_name#X = FRANCE))) -(133) Project +(143) Project Output [4]: [n_name#X AS supp_nation#X, n_name#X AS cust_nation#X, year(l_shipdate#X) AS l_year#X, CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) AS volume#X] Input [7]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X, n_nationkey#X, n_name#X] -(134) HashAggregate +(144) HashAggregate Input [4]: [supp_nation#X, cust_nation#X, l_year#X, volume#X] Keys [3]: [supp_nation#X, cust_nation#X, l_year#X] Functions [1]: [partial_sum(volume#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] -(135) Exchange +(145) Exchange Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] Arguments: hashpartitioning(supp_nation#X, cust_nation#X, l_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(136) HashAggregate +(146) HashAggregate Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] Keys [3]: [supp_nation#X, cust_nation#X, l_year#X] Functions [1]: [sum(volume#X)] Aggregate Attributes [1]: [sum(volume#X)#X] Results [4]: [supp_nation#X, cust_nation#X, l_year#X, sum(volume#X)#X AS revenue#X] -(137) Exchange +(147) Exchange Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: rangepartitioning(supp_nation#X ASC NULLS FIRST, cust_nation#X ASC NULLS FIRST, l_year#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(138) Sort +(148) Sort Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: [supp_nation#X ASC NULLS FIRST, cust_nation#X ASC NULLS FIRST, l_year#X ASC NULLS FIRST], true, 0 -(139) AdaptiveSparkPlan +(149) AdaptiveSparkPlan Output [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/8.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/8.txt index 47886e292bf7..282790ba6507 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/8.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/8.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (193) +AdaptiveSparkPlan (207) +- == Final Plan == VeloxColumnarToRowExec (141) +- ^ SortExecTransformer (139) @@ -110,57 +110,71 @@ AdaptiveSparkPlan (193) +- ^ FilterExecTransformer (113) +- ^ Scan parquet (112) +- == Initial Plan == - Sort (192) - +- Exchange (191) - +- HashAggregate (190) - +- Exchange (189) - +- HashAggregate (188) - +- Project (187) - +- ShuffledHashJoin Inner BuildRight (186) - :- Exchange (181) - : +- Project (180) - : +- ShuffledHashJoin Inner BuildRight (179) - : :- Exchange (175) - : : +- Project (174) - : : +- ShuffledHashJoin Inner BuildRight (173) - : : :- Exchange (169) - : : : +- Project (168) - : : : +- ShuffledHashJoin Inner BuildRight (167) - : : : :- Exchange (163) - : : : : +- Project (162) - : : : : +- ShuffledHashJoin Inner BuildRight (161) - : : : : :- Exchange (157) - : : : : : +- Project (156) - : : : : : +- ShuffledHashJoin Inner BuildRight (155) - : : : : : :- Exchange (151) - : : : : : : +- Project (150) - : : : : : : +- ShuffledHashJoin Inner BuildLeft (149) - : : : : : : :- Exchange (145) - : : : : : : : +- Project (144) - : : : : : : : +- Filter (143) - : : : : : : : +- Scan parquet (142) - : : : : : : +- Exchange (148) - : : : : : : +- Filter (147) - : : : : : : +- Scan parquet (146) - : : : : : +- Exchange (154) - : : : : : +- Filter (153) - : : : : : +- Scan parquet (152) - : : : : +- Exchange (160) - : : : : +- Filter (159) - : : : : +- Scan parquet (158) - : : : +- Exchange (166) - : : : +- Filter (165) - : : : +- Scan parquet (164) - : : +- Exchange (172) - : : +- Filter (171) - : : +- Scan parquet (170) - : +- Exchange (178) - : +- Filter (177) - : +- Scan parquet (176) - +- Exchange (185) - +- Project (184) - +- Filter (183) - +- Scan parquet (182) + Sort (206) + +- Exchange (205) + +- HashAggregate (204) + +- Exchange (203) + +- HashAggregate (202) + +- Project (201) + +- SortMergeJoin Inner (200) + :- Sort (194) + : +- Exchange (193) + : +- Project (192) + : +- SortMergeJoin Inner (191) + : :- Sort (186) + : : +- Exchange (185) + : : +- Project (184) + : : +- SortMergeJoin Inner (183) + : : :- Sort (178) + : : : +- Exchange (177) + : : : +- Project (176) + : : : +- SortMergeJoin Inner (175) + : : : :- Sort (170) + : : : : +- Exchange (169) + : : : : +- Project (168) + : : : : +- SortMergeJoin Inner (167) + : : : : :- Sort (162) + : : : : : +- Exchange (161) + : : : : : +- Project (160) + : : : : : +- SortMergeJoin Inner (159) + : : : : : :- Sort (154) + : : : : : : +- Exchange (153) + : : : : : : +- Project (152) + : : : : : : +- SortMergeJoin Inner (151) + : : : : : : :- Sort (146) + : : : : : : : +- Exchange (145) + : : : : : : : +- Project (144) + : : : : : : : +- Filter (143) + : : : : : : : +- Scan parquet (142) + : : : : : : +- Sort (150) + : : : : : : +- Exchange (149) + : : : : : : +- Filter (148) + : : : : : : +- Scan parquet (147) + : : : : : +- Sort (158) + : : : : : +- Exchange (157) + : : : : : +- Filter (156) + : : : : : +- Scan parquet (155) + : : : : +- Sort (166) + : : : : +- Exchange (165) + : : : : +- Filter (164) + : : : : +- Scan parquet (163) + : : : +- Sort (174) + : : : +- Exchange (173) + : : : +- Filter (172) + : : : +- Scan parquet (171) + : : +- Sort (182) + : : +- Exchange (181) + : : +- Filter (180) + : : +- Scan parquet (179) + : +- Sort (190) + : +- Exchange (189) + : +- Filter (188) + : +- Scan parquet (187) + +- Sort (199) + +- Exchange (198) + +- Project (197) + +- Filter (196) + +- Scan parquet (195) (1) Scan parquet @@ -750,228 +764,284 @@ Input [2]: [p_partkey#X, p_type#X] Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(146) Scan parquet +(146) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(147) Scan parquet Output [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(147) Filter +(148) Filter Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Condition : ((isnotnull(l_partkey#X) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(148) Exchange +(149) Exchange Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(149) ShuffledHashJoin +(150) Sort +Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(151) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join condition: None -(150) Project +(152) Project Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Input [6]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] -(151) Exchange +(153) Exchange Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(152) Scan parquet +(154) Sort +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(155) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(153) Filter +(156) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(154) Exchange +(157) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(155) ShuffledHashJoin +(158) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(159) SortMergeJoin Left keys [1]: [l_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(156) Project +(160) Project Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Input [6]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(157) Exchange +(161) Exchange Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(158) Scan parquet +(162) Sort +Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(163) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1995-01-01), LessThanOrEqual(o_orderdate,1996-12-31), IsNotNull(o_orderkey), IsNotNull(o_custkey)] ReadSchema: struct -(159) Filter +(164) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1995-01-01)) AND (o_orderdate#X <= 1996-12-31)) AND isnotnull(o_orderkey#X)) AND isnotnull(o_custkey#X)) -(160) Exchange +(165) Exchange Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(161) ShuffledHashJoin +(166) Sort +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(167) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(162) Project +(168) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] Input [7]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X] -(163) Exchange +(169) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(164) Scan parquet +(170) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(171) Scan parquet Output [2]: [c_custkey#X, c_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] ReadSchema: struct -(165) Filter +(172) Filter Input [2]: [c_custkey#X, c_nationkey#X] Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) -(166) Exchange +(173) Exchange Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(167) ShuffledHashJoin +(174) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(175) SortMergeJoin Left keys [1]: [o_custkey#X] Right keys [1]: [c_custkey#X] Join condition: None -(168) Project +(176) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X, c_custkey#X, c_nationkey#X] -(169) Exchange +(177) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(170) Scan parquet +(178) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(179) Scan parquet Output [2]: [n_nationkey#X, n_regionkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] ReadSchema: struct -(171) Filter +(180) Filter Input [2]: [n_nationkey#X, n_regionkey#X] Condition : (isnotnull(n_nationkey#X) AND isnotnull(n_regionkey#X)) -(172) Exchange +(181) Exchange Input [2]: [n_nationkey#X, n_regionkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(173) ShuffledHashJoin +(182) Sort +Input [2]: [n_nationkey#X, n_regionkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(183) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(174) Project +(184) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X, n_nationkey#X, n_regionkey#X] -(175) Exchange +(185) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(176) Scan parquet +(186) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(187) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(177) Filter +(188) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(178) Exchange +(189) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(179) ShuffledHashJoin +(190) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(191) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(180) Project +(192) Project Output [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X, n_nationkey#X, n_name#X] -(181) Exchange +(193) Exchange Input [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] Arguments: hashpartitioning(n_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(182) Scan parquet +(194) Sort +Input [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] +Arguments: [n_regionkey#X ASC NULLS FIRST], false, 0 + +(195) Scan parquet Output [2]: [r_regionkey#X, r_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(r_name), EqualTo(r_name,AMERICA), IsNotNull(r_regionkey)] ReadSchema: struct -(183) Filter +(196) Filter Input [2]: [r_regionkey#X, r_name#X] Condition : ((isnotnull(r_name#X) AND (r_name#X = AMERICA)) AND isnotnull(r_regionkey#X)) -(184) Project +(197) Project Output [1]: [r_regionkey#X] Input [2]: [r_regionkey#X, r_name#X] -(185) Exchange +(198) Exchange Input [1]: [r_regionkey#X] Arguments: hashpartitioning(r_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(186) ShuffledHashJoin +(199) Sort +Input [1]: [r_regionkey#X] +Arguments: [r_regionkey#X ASC NULLS FIRST], false, 0 + +(200) SortMergeJoin Left keys [1]: [n_regionkey#X] Right keys [1]: [r_regionkey#X] Join condition: None -(187) Project +(201) Project Output [3]: [year(o_orderdate#X) AS o_year#X, CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) AS volume#X, n_name#X AS nation#X] Input [6]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X, r_regionkey#X] -(188) HashAggregate +(202) HashAggregate Input [3]: [o_year#X, volume#X, nation#X] Keys [1]: [o_year#X] Functions [2]: [partial_sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END), partial_sum(volume#X)] Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Results [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] -(189) Exchange +(203) Exchange Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] Arguments: hashpartitioning(o_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(190) HashAggregate +(204) HashAggregate Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] Keys [1]: [o_year#X] Functions [2]: [sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END), sum(volume#X)] Aggregate Attributes [2]: [sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X, sum(volume#X)#X] Results [2]: [o_year#X, CheckOverflow((promote_precision(sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X) / promote_precision(sum(volume#X)#X)), DecimalType(38,6)) AS mkt_share#X] -(191) Exchange +(205) Exchange Input [2]: [o_year#X, mkt_share#X] Arguments: rangepartitioning(o_year#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(192) Sort +(206) Sort Input [2]: [o_year#X, mkt_share#X] Arguments: [o_year#X ASC NULLS FIRST], true, 0 -(193) AdaptiveSparkPlan +(207) AdaptiveSparkPlan Output [2]: [o_year#X, mkt_share#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/9.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/9.txt index 0bad5c20cf05..15fbf97a77f3 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/9.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark33/9.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (145) +AdaptiveSparkPlan (155) +- == Final Plan == VeloxColumnarToRowExec (106) +- ^ SortExecTransformer (104) @@ -83,44 +83,54 @@ AdaptiveSparkPlan (145) +- ^ FilterExecTransformer (79) +- ^ Scan parquet (78) +- == Initial Plan == - Sort (144) - +- Exchange (143) - +- HashAggregate (142) - +- Exchange (141) - +- HashAggregate (140) - +- Project (139) - +- ShuffledHashJoin Inner BuildRight (138) - :- Exchange (134) - : +- Project (133) - : +- ShuffledHashJoin Inner BuildRight (132) - : :- Exchange (128) - : : +- Project (127) - : : +- ShuffledHashJoin Inner BuildRight (126) - : : :- Exchange (122) - : : : +- Project (121) - : : : +- ShuffledHashJoin Inner BuildRight (120) - : : : :- Exchange (116) - : : : : +- Project (115) - : : : : +- ShuffledHashJoin Inner BuildLeft (114) - : : : : :- Exchange (110) - : : : : : +- Project (109) - : : : : : +- Filter (108) - : : : : : +- Scan parquet (107) - : : : : +- Exchange (113) - : : : : +- Filter (112) - : : : : +- Scan parquet (111) - : : : +- Exchange (119) - : : : +- Filter (118) - : : : +- Scan parquet (117) - : : +- Exchange (125) - : : +- Filter (124) - : : +- Scan parquet (123) - : +- Exchange (131) - : +- Filter (130) - : +- Scan parquet (129) - +- Exchange (137) - +- Filter (136) - +- Scan parquet (135) + Sort (154) + +- Exchange (153) + +- HashAggregate (152) + +- Exchange (151) + +- HashAggregate (150) + +- Project (149) + +- SortMergeJoin Inner (148) + :- Sort (143) + : +- Exchange (142) + : +- Project (141) + : +- SortMergeJoin Inner (140) + : :- Sort (135) + : : +- Exchange (134) + : : +- Project (133) + : : +- SortMergeJoin Inner (132) + : : :- Sort (127) + : : : +- Exchange (126) + : : : +- Project (125) + : : : +- SortMergeJoin Inner (124) + : : : :- Sort (119) + : : : : +- Exchange (118) + : : : : +- Project (117) + : : : : +- SortMergeJoin Inner (116) + : : : : :- Sort (111) + : : : : : +- Exchange (110) + : : : : : +- Project (109) + : : : : : +- Filter (108) + : : : : : +- Scan parquet (107) + : : : : +- Sort (115) + : : : : +- Exchange (114) + : : : : +- Filter (113) + : : : : +- Scan parquet (112) + : : : +- Sort (123) + : : : +- Exchange (122) + : : : +- Filter (121) + : : : +- Scan parquet (120) + : : +- Sort (131) + : : +- Exchange (130) + : : +- Filter (129) + : : +- Scan parquet (128) + : +- Sort (139) + : +- Exchange (138) + : +- Filter (137) + : +- Scan parquet (136) + +- Sort (147) + +- Exchange (146) + +- Filter (145) + +- Scan parquet (144) (1) Scan parquet @@ -570,168 +580,208 @@ Input [2]: [p_partkey#X, p_name#X] Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(111) Scan parquet +(111) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(112) Scan parquet Output [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(112) Filter +(113) Filter Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Condition : ((isnotnull(l_partkey#X) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(113) Exchange +(114) Exchange Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(115) Sort +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(116) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join condition: None -(115) Project +(117) Project Output [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Input [7]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] -(116) Exchange +(118) Exchange Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(119) Sort +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(120) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(118) Filter +(121) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(119) Exchange +(122) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(123) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(124) SortMergeJoin Left keys [1]: [l_suppkey#X] Right keys [1]: [s_suppkey#X] Join condition: None -(121) Project +(125) Project Output [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Input [8]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(122) Exchange +(126) Exchange Input [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(l_suppkey#X, l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(127) Sort +Input [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [l_suppkey#X ASC NULLS FIRST, l_partkey#X ASC NULLS FIRST], false, 0 + +(128) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey), IsNotNull(ps_partkey)] ReadSchema: struct -(124) Filter +(129) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Condition : (isnotnull(ps_suppkey#X) AND isnotnull(ps_partkey#X)) -(125) Exchange +(130) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) ShuffledHashJoin +(131) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST, ps_partkey#X ASC NULLS FIRST], false, 0 + +(132) SortMergeJoin Left keys [2]: [l_suppkey#X, l_partkey#X] Right keys [2]: [ps_suppkey#X, ps_partkey#X] Join condition: None -(127) Project +(133) Project Output [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] Input [10]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_partkey#X, ps_suppkey#X, ps_supplycost#X] -(128) Exchange +(134) Exchange Input [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Scan parquet +(135) Sort +Input [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(136) Scan parquet Output [2]: [o_orderkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderkey)] ReadSchema: struct -(130) Filter +(137) Filter Input [2]: [o_orderkey#X, o_orderdate#X] Condition : isnotnull(o_orderkey#X) -(131) Exchange +(138) Exchange Input [2]: [o_orderkey#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(139) Sort +Input [2]: [o_orderkey#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(140) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join condition: None -(133) Project +(141) Project Output [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] Input [8]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderkey#X, o_orderdate#X] -(134) Exchange +(142) Exchange Input [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(135) Scan parquet +(143) Sort +Input [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(144) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(136) Filter +(145) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(137) Exchange +(146) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(138) ShuffledHashJoin +(147) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(148) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join condition: None -(139) Project +(149) Project Output [3]: [n_name#X AS nation#X, year(o_orderdate#X) AS o_year#X, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(l_extendedprice#X as decimal(13,2))) * promote_precision(CheckOverflow((1.00 - promote_precision(cast(l_discount#X as decimal(13,2)))), DecimalType(13,2)))), DecimalType(26,4)) as decimal(27,4))) - promote_precision(cast(CheckOverflow((promote_precision(ps_supplycost#X) * promote_precision(l_quantity#X)), DecimalType(25,4)) as decimal(27,4)))), DecimalType(27,4)) AS amount#X] Input [8]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X, n_nationkey#X, n_name#X] -(140) HashAggregate +(150) HashAggregate Input [3]: [nation#X, o_year#X, amount#X] Keys [2]: [nation#X, o_year#X] Functions [1]: [partial_sum(amount#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [nation#X, o_year#X, sum#X, isEmpty#X] -(141) Exchange +(151) Exchange Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] Arguments: hashpartitioning(nation#X, o_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(142) HashAggregate +(152) HashAggregate Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] Keys [2]: [nation#X, o_year#X] Functions [1]: [sum(amount#X)] Aggregate Attributes [1]: [sum(amount#X)#X] Results [3]: [nation#X, o_year#X, sum(amount#X)#X AS sum_profit#X] -(143) Exchange +(153) Exchange Input [3]: [nation#X, o_year#X, sum_profit#X] Arguments: rangepartitioning(nation#X ASC NULLS FIRST, o_year#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(144) Sort +(154) Sort Input [3]: [nation#X, o_year#X, sum_profit#X] Arguments: [nation#X ASC NULLS FIRST, o_year#X DESC NULLS LAST], true, 0 -(145) AdaptiveSparkPlan +(155) AdaptiveSparkPlan Output [3]: [nation#X, o_year#X, sum_profit#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/10.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/10.txt index 5be72ee42483..d7376c740f93 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/10.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/10.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (94) +AdaptiveSparkPlan (100) +- == Final Plan == VeloxColumnarToRowExec (67) +- TakeOrderedAndProjectExecTransformer (66) @@ -54,32 +54,38 @@ AdaptiveSparkPlan (94) +- ^ FilterExecTransformer (45) +- ^ Scan parquet (44) +- == Initial Plan == - TakeOrderedAndProject (93) - +- HashAggregate (92) - +- Exchange (91) - +- HashAggregate (90) - +- Project (89) - +- ShuffledHashJoin Inner BuildRight (88) - :- Exchange (84) - : +- Project (83) - : +- ShuffledHashJoin Inner BuildRight (82) - : :- Exchange (77) - : : +- Project (76) - : : +- ShuffledHashJoin Inner BuildRight (75) - : : :- Exchange (70) - : : : +- Filter (69) - : : : +- Scan parquet (68) - : : +- Exchange (74) - : : +- Project (73) - : : +- Filter (72) - : : +- Scan parquet (71) - : +- Exchange (81) - : +- Project (80) - : +- Filter (79) - : +- Scan parquet (78) - +- Exchange (87) - +- Filter (86) - +- Scan parquet (85) + TakeOrderedAndProject (99) + +- HashAggregate (98) + +- Exchange (97) + +- HashAggregate (96) + +- Project (95) + +- SortMergeJoin Inner (94) + :- Sort (89) + : +- Exchange (88) + : +- Project (87) + : +- SortMergeJoin Inner (86) + : :- Sort (80) + : : +- Exchange (79) + : : +- Project (78) + : : +- SortMergeJoin Inner (77) + : : :- Sort (71) + : : : +- Exchange (70) + : : : +- Filter (69) + : : : +- Scan parquet (68) + : : +- Sort (76) + : : +- Exchange (75) + : : +- Project (74) + : : +- Filter (73) + : : +- Scan parquet (72) + : +- Sort (85) + : +- Exchange (84) + : +- Project (83) + : +- Filter (82) + : +- Scan parquet (81) + +- Sort (93) + +- Exchange (92) + +- Filter (91) + +- Scan parquet (90) (1) Scan parquet @@ -374,119 +380,143 @@ Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) Input [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(71) Scan parquet +(71) Sort +Input [7]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(72) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1993-10-01), LessThan(o_orderdate,1994-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(72) Filter +(73) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1993-10-01)) AND (o_orderdate#X < 1994-01-01)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(73) Project +(74) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] -(74) Exchange +(75) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(75) ShuffledHashJoin +(76) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(77) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join type: Inner Join condition: None -(76) Project +(78) Project Output [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, o_custkey#X] -(77) Exchange +(79) Exchange Input [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(78) Scan parquet +(80) Sort +Input [8]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(81) Scan parquet Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_returnflag), EqualTo(l_returnflag,R), IsNotNull(l_orderkey)] ReadSchema: struct -(79) Filter +(82) Filter Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] Condition : ((isnotnull(l_returnflag#X) AND (l_returnflag#X = R)) AND isnotnull(l_orderkey#X)) -(80) Project +(83) Project Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_returnflag#X] -(81) Exchange +(84) Exchange Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(82) ShuffledHashJoin +(85) Sort +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(86) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: Inner Join condition: None -(83) Project +(87) Project Output [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, o_orderkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X] -(84) Exchange +(88) Exchange Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(85) Scan parquet +(89) Sort +Input [9]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(90) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(86) Filter +(91) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(87) Exchange +(92) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(88) ShuffledHashJoin +(93) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(94) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(89) Project +(95) Project Output [9]: [c_custkey#X, c_name#X, c_address#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_name#X] Input [11]: [c_custkey#X, c_name#X, c_address#X, c_nationkey#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_nationkey#X, n_name#X] -(90) HashAggregate +(96) HashAggregate Input [9]: [c_custkey#X, c_name#X, c_address#X, c_phone#X, c_acctbal#X, c_comment#X, l_extendedprice#X, l_discount#X, n_name#X] Keys [7]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] -(91) Exchange +(97) Exchange Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] Arguments: hashpartitioning(c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(92) HashAggregate +(98) HashAggregate Input [9]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X, sum#X, isEmpty#X] Keys [7]: [c_custkey#X, c_name#X, c_acctbal#X, c_phone#X, n_name#X, c_address#X, c_comment#X] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [8]: [c_custkey#X, c_name#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] -(93) TakeOrderedAndProject +(99) TakeOrderedAndProject Input [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] Arguments: X, [revenue#X DESC NULLS LAST], [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] -(94) AdaptiveSparkPlan +(100) AdaptiveSparkPlan Output [8]: [c_custkey#X, c_name#X, revenue#X, c_acctbal#X, n_name#X, c_address#X, c_phone#X, c_comment#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/11.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/11.txt index d9dbbfe0dbe9..c9371ffbf2c3 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/11.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/11.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (78) +AdaptiveSparkPlan (82) +- == Final Plan == VeloxColumnarToRowExec (56) +- ^ SortExecTransformer (54) @@ -45,27 +45,31 @@ AdaptiveSparkPlan (78) +- ^ FilterExecTransformer (28) +- ^ Scan parquet (27) +- == Initial Plan == - Sort (77) - +- Exchange (76) - +- Filter (75) - +- HashAggregate (74) - +- Exchange (73) - +- HashAggregate (72) - +- Project (71) - +- ShuffledHashJoin Inner BuildRight (70) - :- Exchange (65) - : +- Project (64) - : +- ShuffledHashJoin Inner BuildRight (63) - : :- Exchange (59) - : : +- Filter (58) - : : +- Scan parquet (57) - : +- Exchange (62) - : +- Filter (61) - : +- Scan parquet (60) - +- Exchange (69) - +- Project (68) - +- Filter (67) - +- Scan parquet (66) + Sort (81) + +- Exchange (80) + +- Filter (79) + +- HashAggregate (78) + +- Exchange (77) + +- HashAggregate (76) + +- Project (75) + +- SortMergeJoin Inner (74) + :- Sort (68) + : +- Exchange (67) + : +- Project (66) + : +- SortMergeJoin Inner (65) + : :- Sort (60) + : : +- Exchange (59) + : : +- Filter (58) + : : +- Scan parquet (57) + : +- Sort (64) + : +- Exchange (63) + : +- Filter (62) + : +- Scan parquet (61) + +- Sort (73) + +- Exchange (72) + +- Project (71) + +- Filter (70) + +- Scan parquet (69) (1) Scan parquet @@ -313,365 +317,401 @@ Condition : isnotnull(ps_suppkey#X) Input [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(60) Scan parquet +(60) Sort +Input [4]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(61) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(61) Filter +(62) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(62) Exchange +(63) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(63) ShuffledHashJoin +(64) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(65) SortMergeJoin Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join type: Inner Join condition: None -(64) Project +(66) Project Output [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(65) Exchange +(67) Exchange Input [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(66) Scan parquet +(68) Sort +Input [4]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(69) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,GERMANY), IsNotNull(n_nationkey)] ReadSchema: struct -(67) Filter +(70) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = GERMANY)) AND isnotnull(n_nationkey#X)) -(68) Project +(71) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(69) Exchange +(72) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(70) ShuffledHashJoin +(73) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(74) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(71) Project +(75) Project Output [3]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X] Input [5]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(72) HashAggregate +(76) HashAggregate Input [3]: [ps_partkey#X, ps_availqty#X, ps_supplycost#X] Keys [1]: [ps_partkey#X] Functions [1]: [partial_sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [ps_partkey#X, sum#X, isEmpty#X] -(73) Exchange +(77) Exchange Input [3]: [ps_partkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) HashAggregate +(78) HashAggregate Input [3]: [ps_partkey#X, sum#X, isEmpty#X] Keys [1]: [ps_partkey#X] Functions [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] Aggregate Attributes [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] Results [2]: [ps_partkey#X, sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X AS value#X] -(75) Filter +(79) Filter Input [2]: [ps_partkey#X, value#X] Condition : (isnotnull(value#X) AND (cast(value#X as decimal(38,6)) > Subquery subquery#X, [id=#X])) -(76) Exchange +(80) Exchange Input [2]: [ps_partkey#X, value#X] Arguments: rangepartitioning(value#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(77) Sort +(81) Sort Input [2]: [ps_partkey#X, value#X] Arguments: [value#X DESC NULLS LAST], true, 0 -(78) AdaptiveSparkPlan +(82) AdaptiveSparkPlan Output [2]: [ps_partkey#X, value#X] Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 47 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (128) +AdaptiveSparkPlan (136) +- == Final Plan == - VeloxColumnarToRowExec (110) - +- ^ ProjectExecTransformer (108) - +- ^ RegularHashAggregateExecTransformer (107) - +- ^ RegularHashAggregateExecTransformer (106) - +- ^ ProjectExecTransformer (105) - +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (104) - :- ^ InputIteratorTransformer (99) - : +- ShuffleQueryStage (97), Statistics(X) - : +- ColumnarExchange (96) - : +- VeloxAppendBatches (95) - : +- ^ ProjectExecTransformer (93) - : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (92) - : :- ^ InputIteratorTransformer (87) - : : +- ShuffleQueryStage (85), Statistics(X) - : : +- ColumnarExchange (84) - : : +- VeloxAppendBatches (83) - : : +- ^ ProjectExecTransformer (81) - : : +- ^ FilterExecTransformer (80) - : : +- ^ Scan parquet (79) - : +- ^ InputIteratorTransformer (91) - : +- ShuffleQueryStage (89), Statistics(X) - : +- ReusedExchange (88) - +- ^ InputIteratorTransformer (103) - +- ShuffleQueryStage (101), Statistics(X) - +- ReusedExchange (100) + VeloxColumnarToRowExec (114) + +- ^ ProjectExecTransformer (112) + +- ^ RegularHashAggregateExecTransformer (111) + +- ^ RegularHashAggregateExecTransformer (110) + +- ^ ProjectExecTransformer (109) + +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (108) + :- ^ InputIteratorTransformer (103) + : +- ShuffleQueryStage (101), Statistics(X) + : +- ColumnarExchange (100) + : +- VeloxAppendBatches (99) + : +- ^ ProjectExecTransformer (97) + : +- ^ ShuffledHashJoinExecTransformer Inner BuildRight (96) + : :- ^ InputIteratorTransformer (91) + : : +- ShuffleQueryStage (89), Statistics(X) + : : +- ColumnarExchange (88) + : : +- VeloxAppendBatches (87) + : : +- ^ ProjectExecTransformer (85) + : : +- ^ FilterExecTransformer (84) + : : +- ^ Scan parquet (83) + : +- ^ InputIteratorTransformer (95) + : +- ShuffleQueryStage (93), Statistics(X) + : +- ReusedExchange (92) + +- ^ InputIteratorTransformer (107) + +- ShuffleQueryStage (105), Statistics(X) + +- ReusedExchange (104) +- == Initial Plan == - HashAggregate (127) - +- HashAggregate (126) - +- Project (125) - +- ShuffledHashJoin Inner BuildRight (124) - :- Exchange (119) - : +- Project (118) - : +- ShuffledHashJoin Inner BuildRight (117) - : :- Exchange (113) - : : +- Filter (112) - : : +- Scan parquet (111) - : +- Exchange (116) - : +- Filter (115) - : +- Scan parquet (114) - +- Exchange (123) - +- Project (122) - +- Filter (121) - +- Scan parquet (120) - - -(79) Scan parquet + HashAggregate (135) + +- HashAggregate (134) + +- Project (133) + +- SortMergeJoin Inner (132) + :- Sort (126) + : +- Exchange (125) + : +- Project (124) + : +- SortMergeJoin Inner (123) + : :- Sort (118) + : : +- Exchange (117) + : : +- Filter (116) + : : +- Scan parquet (115) + : +- Sort (122) + : +- Exchange (121) + : +- Filter (120) + : +- Scan parquet (119) + +- Sort (131) + +- Exchange (130) + +- Project (129) + +- Filter (128) + +- Scan parquet (127) + + +(83) Scan parquet Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey)] ReadSchema: struct -(80) FilterExecTransformer +(84) FilterExecTransformer Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: isnotnull(ps_suppkey#X) -(81) ProjectExecTransformer +(85) ProjectExecTransformer Output [4]: [hash(ps_suppkey#X, 42) AS hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] -(82) WholeStageCodegenTransformer (X) +(86) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: false -(83) VeloxAppendBatches +(87) VeloxAppendBatches Input [4]: [hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: X -(84) ColumnarExchange +(88) ColumnarExchange Input [4]: [hash_partition_key#X, ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [ps_suppkey#X, ps_availqty#X, ps_supplycost#X], [plan_id=X], [id=#X] -(85) ShuffleQueryStage +(89) ShuffleQueryStage Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: X -(86) InputAdapter +(90) InputAdapter Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] -(87) InputIteratorTransformer +(91) InputIteratorTransformer Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] -(88) ReusedExchange [Reuses operator id: 15] +(92) ReusedExchange [Reuses operator id: 15] Output [2]: [s_suppkey#X, s_nationkey#X] -(89) ShuffleQueryStage +(93) ShuffleQueryStage Output [2]: [s_suppkey#X, s_nationkey#X] Arguments: X -(90) InputAdapter +(94) InputAdapter Input [2]: [s_suppkey#X, s_nationkey#X] -(91) InputIteratorTransformer +(95) InputIteratorTransformer Input [2]: [s_suppkey#X, s_nationkey#X] -(92) ShuffledHashJoinExecTransformer +(96) ShuffledHashJoinExecTransformer Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join type: Inner Join condition: None -(93) ProjectExecTransformer +(97) ProjectExecTransformer Output [4]: [hash(s_nationkey#X, 42) AS hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [5]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(94) WholeStageCodegenTransformer (X) +(98) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: false -(95) VeloxAppendBatches +(99) VeloxAppendBatches Input [4]: [hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: X -(96) ColumnarExchange +(100) ColumnarExchange Input [4]: [hash_partition_key#X, ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [ps_availqty#X, ps_supplycost#X, s_nationkey#X], [plan_id=X], [id=#X] -(97) ShuffleQueryStage +(101) ShuffleQueryStage Output [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: X -(98) InputAdapter +(102) InputAdapter Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] -(99) InputIteratorTransformer +(103) InputIteratorTransformer Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] -(100) ReusedExchange [Reuses operator id: 32] +(104) ReusedExchange [Reuses operator id: 32] Output [1]: [n_nationkey#X] -(101) ShuffleQueryStage +(105) ShuffleQueryStage Output [1]: [n_nationkey#X] Arguments: X -(102) InputAdapter +(106) InputAdapter Input [1]: [n_nationkey#X] -(103) InputIteratorTransformer +(107) InputIteratorTransformer Input [1]: [n_nationkey#X] -(104) ShuffledHashJoinExecTransformer +(108) ShuffledHashJoinExecTransformer Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(105) ProjectExecTransformer +(109) ProjectExecTransformer Output [3]: [ps_availqty#X, ps_supplycost#X, (ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))) AS _pre_X#X] Input [4]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(106) RegularHashAggregateExecTransformer +(110) RegularHashAggregateExecTransformer Input [3]: [ps_availqty#X, ps_supplycost#X, _pre_X#X] Keys: [] Functions [1]: [partial_sum(_pre_X#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(107) RegularHashAggregateExecTransformer +(111) RegularHashAggregateExecTransformer Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] Aggregate Attributes [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] Results [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] -(108) ProjectExecTransformer +(112) ProjectExecTransformer Output [1]: [(sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X * 0.0001000000) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Input [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] -(109) WholeStageCodegenTransformer (X) +(113) WholeStageCodegenTransformer (X) Input [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Arguments: false -(110) VeloxColumnarToRowExec +(114) VeloxColumnarToRowExec Input [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] -(111) Scan parquet +(115) Scan parquet Output [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey)] ReadSchema: struct -(112) Filter +(116) Filter Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Condition : isnotnull(ps_suppkey#X) -(113) Exchange +(117) Exchange Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) Scan parquet +(118) Sort +Input [3]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(119) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(115) Filter +(120) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(116) Exchange +(121) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) ShuffledHashJoin +(122) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(123) SortMergeJoin Left keys [1]: [ps_suppkey#X] Right keys [1]: [s_suppkey#X] Join type: Inner Join condition: None -(118) Project +(124) Project Output [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Input [5]: [ps_suppkey#X, ps_availqty#X, ps_supplycost#X, s_suppkey#X, s_nationkey#X] -(119) Exchange +(125) Exchange Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) Scan parquet +(126) Sort +Input [3]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(127) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,GERMANY), IsNotNull(n_nationkey)] ReadSchema: struct -(121) Filter +(128) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = GERMANY)) AND isnotnull(n_nationkey#X)) -(122) Project +(129) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(123) Exchange +(130) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(124) ShuffledHashJoin +(131) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(132) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(125) Project +(133) Project Output [2]: [ps_availqty#X, ps_supplycost#X] Input [4]: [ps_availqty#X, ps_supplycost#X, s_nationkey#X, n_nationkey#X] -(126) HashAggregate +(134) HashAggregate Input [2]: [ps_availqty#X, ps_supplycost#X] Keys: [] Functions [1]: [partial_sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(127) HashAggregate +(135) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))] Aggregate Attributes [1]: [sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X] Results [1]: [(sum((ps_supplycost#X * cast(ps_availqty#X as decimal(10,0))))#X * 0.0001000000) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] -(128) AdaptiveSparkPlan +(136) AdaptiveSparkPlan Output [1]: [(sum((ps_supplycost * ps_availqty)) * 0.0001000000)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/12.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/12.txt index 63c356d6d1bf..ce033f5468d1 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/12.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/12.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (53) +AdaptiveSparkPlan (55) +- == Final Plan == VeloxColumnarToRowExec (38) +- ^ SortExecTransformer (36) @@ -31,20 +31,22 @@ AdaptiveSparkPlan (53) +- ^ FilterExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (52) - +- Exchange (51) - +- HashAggregate (50) - +- Exchange (49) - +- HashAggregate (48) - +- Project (47) - +- ShuffledHashJoin Inner BuildLeft (46) - :- Exchange (41) - : +- Filter (40) - : +- Scan parquet (39) - +- Exchange (45) - +- Project (44) - +- Filter (43) - +- Scan parquet (42) + Sort (54) + +- Exchange (53) + +- HashAggregate (52) + +- Exchange (51) + +- HashAggregate (50) + +- Project (49) + +- SortMergeJoin Inner (48) + :- Sort (42) + : +- Exchange (41) + : +- Filter (40) + : +- Scan parquet (39) + +- Sort (47) + +- Exchange (46) + +- Project (45) + +- Filter (44) + +- Scan parquet (43) (1) Scan parquet @@ -219,61 +221,69 @@ Condition : isnotnull(o_orderkey#X) Input [2]: [o_orderkey#X, o_orderpriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(42) Scan parquet +(42) Sort +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(43) Scan parquet Output [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate), IsNotNull(l_shipdate), In(l_shipmode, [MAIL,SHIP]), GreaterThanOrEqual(l_receiptdate,1994-01-01), LessThan(l_receiptdate,1995-01-01), IsNotNull(l_orderkey)] ReadSchema: struct -(43) Filter +(44) Filter Input [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] Condition : ((((((((isnotnull(l_commitdate#X) AND isnotnull(l_receiptdate#X)) AND isnotnull(l_shipdate#X)) AND l_shipmode#X IN (MAIL,SHIP)) AND (l_commitdate#X < l_receiptdate#X)) AND (l_shipdate#X < l_commitdate#X)) AND (l_receiptdate#X >= 1994-01-01)) AND (l_receiptdate#X < 1995-01-01)) AND isnotnull(l_orderkey#X)) -(44) Project +(45) Project Output [2]: [l_orderkey#X, l_shipmode#X] Input [5]: [l_orderkey#X, l_shipdate#X, l_commitdate#X, l_receiptdate#X, l_shipmode#X] -(45) Exchange +(46) Exchange Input [2]: [l_orderkey#X, l_shipmode#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(46) ShuffledHashJoin +(47) Sort +Input [2]: [l_orderkey#X, l_shipmode#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(48) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: Inner Join condition: None -(47) Project +(49) Project Output [2]: [o_orderpriority#X, l_shipmode#X] Input [4]: [o_orderkey#X, o_orderpriority#X, l_orderkey#X, l_shipmode#X] -(48) HashAggregate +(50) HashAggregate Input [2]: [o_orderpriority#X, l_shipmode#X] Keys [1]: [l_shipmode#X] Functions [2]: [partial_sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)] Aggregate Attributes [2]: [sum#X, sum#X] Results [3]: [l_shipmode#X, sum#X, sum#X] -(49) Exchange +(51) Exchange Input [3]: [l_shipmode#X, sum#X, sum#X] Arguments: hashpartitioning(l_shipmode#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(50) HashAggregate +(52) HashAggregate Input [3]: [l_shipmode#X, sum#X, sum#X] Keys [1]: [l_shipmode#X] Functions [2]: [sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END), sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)] Aggregate Attributes [2]: [sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X, sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X] Results [3]: [l_shipmode#X, sum(CASE WHEN ((o_orderpriority#X = 1-URGENT) OR (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X AS high_line_count#X, sum(CASE WHEN (NOT (o_orderpriority#X = 1-URGENT) AND NOT (o_orderpriority#X = 2-HIGH)) THEN 1 ELSE 0 END)#X AS low_line_count#X] -(51) Exchange +(53) Exchange Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: rangepartitioning(l_shipmode#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(52) Sort +(54) Sort Input [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: [l_shipmode#X ASC NULLS FIRST], true, 0 -(53) AdaptiveSparkPlan +(55) AdaptiveSparkPlan Output [3]: [l_shipmode#X, high_line_count#X, low_line_count#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/13.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/13.txt index 812a7be868b6..c71d03b93e12 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/13.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/13.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (56) +AdaptiveSparkPlan (58) +- == Final Plan == VeloxColumnarToRowExec (40) +- ^ SortExecTransformer (38) @@ -33,21 +33,23 @@ AdaptiveSparkPlan (56) +- ^ FilterExecTransformer (10) +- ^ Scan parquet (9) +- == Initial Plan == - Sort (55) - +- Exchange (54) - +- HashAggregate (53) - +- Exchange (52) - +- HashAggregate (51) - +- HashAggregate (50) - +- HashAggregate (49) - +- Project (48) - +- ShuffledHashJoin LeftOuter BuildRight (47) - :- Exchange (42) - : +- Scan parquet (41) - +- Exchange (46) - +- Project (45) - +- Filter (44) - +- Scan parquet (43) + Sort (57) + +- Exchange (56) + +- HashAggregate (55) + +- Exchange (54) + +- HashAggregate (53) + +- HashAggregate (52) + +- HashAggregate (51) + +- Project (50) + +- SortMergeJoin LeftOuter (49) + :- Sort (43) + : +- Exchange (42) + : +- Scan parquet (41) + +- Sort (48) + +- Exchange (47) + +- Project (46) + +- Filter (45) + +- Scan parquet (44) (1) Scan parquet @@ -230,75 +232,83 @@ ReadSchema: struct Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) Scan parquet +(43) Sort +Input [1]: [c_custkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(44) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_comment), IsNotNull(o_custkey)] ReadSchema: struct -(44) Filter +(45) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] Condition : ((isnotnull(o_comment#X) AND NOT o_comment#X LIKE %special%requests%) AND isnotnull(o_custkey#X)) -(45) Project +(46) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_comment#X] -(46) Exchange +(47) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) ShuffledHashJoin +(48) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(49) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join type: LeftOuter Join condition: None -(48) Project +(50) Project Output [2]: [c_custkey#X, o_orderkey#X] Input [3]: [c_custkey#X, o_orderkey#X, o_custkey#X] -(49) HashAggregate +(51) HashAggregate Input [2]: [c_custkey#X, o_orderkey#X] Keys [1]: [c_custkey#X] Functions [1]: [partial_count(o_orderkey#X)] Aggregate Attributes [1]: [count#X] Results [2]: [c_custkey#X, count#X] -(50) HashAggregate +(52) HashAggregate Input [2]: [c_custkey#X, count#X] Keys [1]: [c_custkey#X] Functions [1]: [count(o_orderkey#X)] Aggregate Attributes [1]: [count(o_orderkey#X)#X] Results [1]: [count(o_orderkey#X)#X AS c_count#X] -(51) HashAggregate +(53) HashAggregate Input [1]: [c_count#X] Keys [1]: [c_count#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [c_count#X, count#X] -(52) Exchange +(54) Exchange Input [2]: [c_count#X, count#X] Arguments: hashpartitioning(c_count#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(53) HashAggregate +(55) HashAggregate Input [2]: [c_count#X, count#X] Keys [1]: [c_count#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [c_count#X, count(1)#X AS custdist#X] -(54) Exchange +(56) Exchange Input [2]: [c_count#X, custdist#X] Arguments: rangepartitioning(custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(55) Sort +(57) Sort Input [2]: [c_count#X, custdist#X] Arguments: [custdist#X DESC NULLS LAST, c_count#X DESC NULLS LAST], true, 0 -(56) AdaptiveSparkPlan +(58) AdaptiveSparkPlan Output [2]: [c_count#X, custdist#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/14.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/14.txt index c6f425f00868..492d3f8b9d07 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/14.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/14.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (37) +AdaptiveSparkPlan (39) +- == Final Plan == VeloxColumnarToRowExec (25) +- ^ ProjectExecTransformer (23) @@ -22,17 +22,19 @@ AdaptiveSparkPlan (37) +- ^ FilterExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - HashAggregate (36) - +- HashAggregate (35) - +- Project (34) - +- ShuffledHashJoin Inner BuildRight (33) - :- Exchange (29) - : +- Project (28) - : +- Filter (27) - : +- Scan parquet (26) - +- Exchange (32) - +- Filter (31) - +- Scan parquet (30) + HashAggregate (38) + +- HashAggregate (37) + +- Project (36) + +- SortMergeJoin Inner (35) + :- Sort (30) + : +- Exchange (29) + : +- Project (28) + : +- Filter (27) + : +- Scan parquet (26) + +- Sort (34) + +- Exchange (33) + +- Filter (32) + +- Scan parquet (31) (1) Scan parquet @@ -163,45 +165,53 @@ Input [4]: [l_partkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(30) Scan parquet +(30) Sort +Input [3]: [l_partkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(31) Scan parquet Output [2]: [p_partkey#X, p_type#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_partkey)] ReadSchema: struct -(31) Filter +(32) Filter Input [2]: [p_partkey#X, p_type#X] Condition : isnotnull(p_partkey#X) -(32) Exchange +(33) Exchange Input [2]: [p_partkey#X, p_type#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(33) ShuffledHashJoin +(34) Sort +Input [2]: [p_partkey#X, p_type#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(35) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join type: Inner Join condition: None -(34) Project +(36) Project Output [3]: [l_extendedprice#X, l_discount#X, p_type#X] Input [5]: [l_partkey#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_type#X] -(35) HashAggregate +(37) HashAggregate Input [3]: [l_extendedprice#X, l_discount#X, p_type#X] Keys: [] Functions [2]: [partial_sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END), partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Results [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] -(36) HashAggregate +(38) HashAggregate Input [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Keys: [] Functions [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END), sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [1]: [((100.00 * sum(CASE WHEN StartsWith(p_type#X, PROMO) THEN (l_extendedprice#X * (1 - l_discount#X)) ELSE 0.0000 END)#X) / sum((l_extendedprice#X * (1 - l_discount#X)))#X) AS promo_revenue#X] -(37) AdaptiveSparkPlan +(39) AdaptiveSparkPlan Output [1]: [promo_revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/15.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/15.txt index e30eec3d854f..129e4ad927e9 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/15.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/15.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (45) +AdaptiveSparkPlan (47) +- == Final Plan == VeloxColumnarToRowExec (30) +- AQEShuffleRead (29) @@ -27,20 +27,22 @@ AdaptiveSparkPlan (45) +- ^ FilterExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (44) - +- Exchange (43) - +- Project (42) - +- ShuffledHashJoin Inner BuildLeft (41) - :- Exchange (33) - : +- Filter (32) - : +- Scan parquet (31) - +- Filter (40) - +- HashAggregate (39) - +- Exchange (38) - +- HashAggregate (37) - +- Project (36) - +- Filter (35) - +- Scan parquet (34) + Sort (46) + +- Exchange (45) + +- Project (44) + +- SortMergeJoin Inner (43) + :- Sort (34) + : +- Exchange (33) + : +- Filter (32) + : +- Scan parquet (31) + +- Sort (42) + +- Filter (41) + +- HashAggregate (40) + +- Exchange (39) + +- HashAggregate (38) + +- Project (37) + +- Filter (36) + +- Scan parquet (35) (1) Scan parquet @@ -187,222 +189,230 @@ Condition : isnotnull(s_suppkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(34) Scan parquet +(34) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(35) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01), IsNotNull(l_suppkey)] ReadSchema: struct -(35) Filter +(36) Filter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : (((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) AND isnotnull(l_suppkey#X)) -(36) Project +(37) Project Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(37) HashAggregate +(38) HashAggregate Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(38) Exchange +(39) Exchange Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(39) HashAggregate +(40) HashAggregate Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [2]: [l_suppkey#X AS supplier_no#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS total_revenue#X] -(40) Filter +(41) Filter Input [2]: [supplier_no#X, total_revenue#X] Condition : (isnotnull(total_revenue#X) AND (total_revenue#X = Subquery subquery#X, [id=#X])) -(41) ShuffledHashJoin +(42) Sort +Input [2]: [supplier_no#X, total_revenue#X] +Arguments: [supplier_no#X ASC NULLS FIRST], false, 0 + +(43) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [supplier_no#X] Join type: Inner Join condition: None -(42) Project +(44) Project Output [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Input [6]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, supplier_no#X, total_revenue#X] -(43) Exchange +(45) Exchange Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: rangepartitioning(s_suppkey#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(44) Sort +(46) Sort Input [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: [s_suppkey#X ASC NULLS FIRST], true, 0 -(45) AdaptiveSparkPlan +(47) AdaptiveSparkPlan Output [5]: [s_suppkey#X, s_name#X, s_address#X, s_phone#X, total_revenue#X] Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 22 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (71) +AdaptiveSparkPlan (73) +- == Final Plan == - VeloxColumnarToRowExec (62) - +- ^ RegularHashAggregateExecTransformer (60) - +- ^ RegularHashAggregateExecTransformer (59) - +- ^ ProjectExecTransformer (58) - +- ^ RegularHashAggregateExecTransformer (57) - +- ^ InputIteratorTransformer (56) - +- ShuffleQueryStage (54), Statistics(X) - +- ColumnarExchange (53) - +- VeloxAppendBatches (52) - +- ^ ProjectExecTransformer (50) - +- ^ FlushableHashAggregateExecTransformer (49) - +- ^ ProjectExecTransformer (48) - +- ^ FilterExecTransformer (47) - +- ^ Scan parquet (46) + VeloxColumnarToRowExec (64) + +- ^ RegularHashAggregateExecTransformer (62) + +- ^ RegularHashAggregateExecTransformer (61) + +- ^ ProjectExecTransformer (60) + +- ^ RegularHashAggregateExecTransformer (59) + +- ^ InputIteratorTransformer (58) + +- ShuffleQueryStage (56), Statistics(X) + +- ColumnarExchange (55) + +- VeloxAppendBatches (54) + +- ^ ProjectExecTransformer (52) + +- ^ FlushableHashAggregateExecTransformer (51) + +- ^ ProjectExecTransformer (50) + +- ^ FilterExecTransformer (49) + +- ^ Scan parquet (48) +- == Initial Plan == - HashAggregate (70) - +- HashAggregate (69) - +- HashAggregate (68) - +- Exchange (67) - +- HashAggregate (66) - +- Project (65) - +- Filter (64) - +- Scan parquet (63) + HashAggregate (72) + +- HashAggregate (71) + +- HashAggregate (70) + +- Exchange (69) + +- HashAggregate (68) + +- Project (67) + +- Filter (66) + +- Scan parquet (65) -(46) Scan parquet +(48) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01)] ReadSchema: struct -(47) FilterExecTransformer +(49) FilterExecTransformer Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: ((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) -(48) ProjectExecTransformer +(50) ProjectExecTransformer Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, (l_extendedprice#X * (1 - l_discount#X)) AS _pre_X#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(49) FlushableHashAggregateExecTransformer +(51) FlushableHashAggregateExecTransformer Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, _pre_X#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum(_pre_X#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(50) ProjectExecTransformer +(52) ProjectExecTransformer Output [4]: [hash(l_suppkey#X, 42) AS hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] Input [3]: [l_suppkey#X, sum#X, isEmpty#X] -(51) WholeStageCodegenTransformer (X) +(53) WholeStageCodegenTransformer (X) Input [4]: [hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] Arguments: false -(52) VeloxAppendBatches +(54) VeloxAppendBatches Input [4]: [hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] Arguments: X -(53) ColumnarExchange +(55) ColumnarExchange Input [4]: [hash_partition_key#X, l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [l_suppkey#X, sum#X, isEmpty#X], [plan_id=X], [id=#X] -(54) ShuffleQueryStage +(56) ShuffleQueryStage Output [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: X -(55) InputAdapter +(57) InputAdapter Input [3]: [l_suppkey#X, sum#X, isEmpty#X] -(56) InputIteratorTransformer +(58) InputIteratorTransformer Input [3]: [l_suppkey#X, sum#X, isEmpty#X] -(57) RegularHashAggregateExecTransformer +(59) RegularHashAggregateExecTransformer Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [2]: [l_suppkey#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] -(58) ProjectExecTransformer +(60) ProjectExecTransformer Output [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X AS total_revenue#X] Input [2]: [l_suppkey#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X] -(59) RegularHashAggregateExecTransformer +(61) RegularHashAggregateExecTransformer Input [1]: [total_revenue#X] Keys: [] Functions [1]: [partial_max(total_revenue#X)] Aggregate Attributes [1]: [max#X] Results [1]: [max#X] -(60) RegularHashAggregateExecTransformer +(62) RegularHashAggregateExecTransformer Input [1]: [max#X] Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(61) WholeStageCodegenTransformer (X) +(63) WholeStageCodegenTransformer (X) Input [1]: [max(total_revenue)#X] Arguments: false -(62) VeloxColumnarToRowExec +(64) VeloxColumnarToRowExec Input [1]: [max(total_revenue)#X] -(63) Scan parquet +(65) Scan parquet Output [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01)] ReadSchema: struct -(64) Filter +(66) Filter Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1996-01-01)) AND (l_shipdate#X < 1996-04-01)) -(65) Project +(67) Project Output [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(66) HashAggregate +(68) HashAggregate Input [3]: [l_suppkey#X, l_extendedprice#X, l_discount#X] Keys [1]: [l_suppkey#X] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_suppkey#X, sum#X, isEmpty#X] -(67) Exchange +(69) Exchange Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(68) HashAggregate +(70) HashAggregate Input [3]: [l_suppkey#X, sum#X, isEmpty#X] Keys [1]: [l_suppkey#X] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X AS total_revenue#X] -(69) HashAggregate +(71) HashAggregate Input [1]: [total_revenue#X] Keys: [] Functions [1]: [partial_max(total_revenue#X)] Aggregate Attributes [1]: [max#X] Results [1]: [max#X] -(70) HashAggregate +(72) HashAggregate Input [1]: [max#X] Keys: [] Functions [1]: [max(total_revenue#X)] Aggregate Attributes [1]: [max(total_revenue#X)#X] Results [1]: [max(total_revenue#X)#X AS max(total_revenue)#X] -(71) AdaptiveSparkPlan +(73) AdaptiveSparkPlan Output [1]: [max(total_revenue)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/16.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/16.txt index eff7577281e6..45b6041f8b4b 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/16.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/16.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (69) +AdaptiveSparkPlan (71) +- == Final Plan == VeloxColumnarToRowExec (47) +- ^ SortExecTransformer (45) @@ -38,27 +38,29 @@ AdaptiveSparkPlan (69) +- ^ FilterExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (68) - +- Exchange (67) - +- HashAggregate (66) - +- Exchange (65) - +- HashAggregate (64) - +- HashAggregate (63) - +- Exchange (62) - +- HashAggregate (61) - +- Project (60) - +- ShuffledHashJoin Inner BuildRight (59) - :- Exchange (55) - : +- BroadcastHashJoin LeftAnti BuildRight (54) - : :- Filter (49) - : : +- Scan parquet (48) - : +- BroadcastExchange (53) - : +- Project (52) - : +- Filter (51) - : +- Scan parquet (50) - +- Exchange (58) - +- Filter (57) - +- Scan parquet (56) + Sort (70) + +- Exchange (69) + +- HashAggregate (68) + +- Exchange (67) + +- HashAggregate (66) + +- HashAggregate (65) + +- Exchange (64) + +- HashAggregate (63) + +- Project (62) + +- SortMergeJoin Inner (61) + :- Sort (56) + : +- Exchange (55) + : +- BroadcastHashJoin LeftAnti BuildRight (54) + : :- Filter (49) + : : +- Scan parquet (48) + : +- BroadcastExchange (53) + : +- Project (52) + : +- Filter (51) + : +- Scan parquet (50) + +- Sort (60) + +- Exchange (59) + +- Filter (58) + +- Scan parquet (57) (1) Scan parquet @@ -298,75 +300,83 @@ Join condition: None Input [2]: [ps_partkey#X, ps_suppkey#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(56) Scan parquet +(56) Sort +Input [2]: [ps_partkey#X, ps_suppkey#X] +Arguments: [ps_partkey#X ASC NULLS FIRST], false, 0 + +(57) Scan parquet Output [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_brand), IsNotNull(p_type), Not(EqualTo(p_brand,Brand#X)), Not(StringStartsWith(p_type,MEDIUM POLISHED)), In(p_size, [14,19,23,3,36,45,49,9]), IsNotNull(p_partkey)] ReadSchema: struct -(57) Filter +(58) Filter Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Condition : (((((isnotnull(p_brand#X) AND isnotnull(p_type#X)) AND NOT (p_brand#X = Brand#X)) AND NOT StartsWith(p_type#X, MEDIUM POLISHED)) AND p_size#X IN (49,14,23,45,19,3,36,9)) AND isnotnull(p_partkey#X)) -(58) Exchange +(59) Exchange Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(59) ShuffledHashJoin +(60) Sort +Input [4]: [p_partkey#X, p_brand#X, p_type#X, p_size#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(61) SortMergeJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join type: Inner Join condition: None -(60) Project +(62) Project Output [4]: [ps_suppkey#X, p_brand#X, p_type#X, p_size#X] Input [6]: [ps_partkey#X, ps_suppkey#X, p_partkey#X, p_brand#X, p_type#X, p_size#X] -(61) HashAggregate +(63) HashAggregate Input [4]: [ps_suppkey#X, p_brand#X, p_type#X, p_size#X] Keys [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Functions: [] Aggregate Attributes: [] Results [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] -(62) Exchange +(64) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Arguments: hashpartitioning(p_brand#X, p_type#X, p_size#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(63) HashAggregate +(65) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Keys [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Functions: [] Aggregate Attributes: [] Results [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] -(64) HashAggregate +(66) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, ps_suppkey#X] Keys [3]: [p_brand#X, p_type#X, p_size#X] Functions [1]: [partial_count(distinct ps_suppkey#X)] Aggregate Attributes [1]: [count(ps_suppkey#X)#X] Results [4]: [p_brand#X, p_type#X, p_size#X, count#X] -(65) Exchange +(67) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] Arguments: hashpartitioning(p_brand#X, p_type#X, p_size#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(66) HashAggregate +(68) HashAggregate Input [4]: [p_brand#X, p_type#X, p_size#X, count#X] Keys [3]: [p_brand#X, p_type#X, p_size#X] Functions [1]: [count(distinct ps_suppkey#X)] Aggregate Attributes [1]: [count(ps_suppkey#X)#X] Results [4]: [p_brand#X, p_type#X, p_size#X, count(ps_suppkey#X)#X AS supplier_cnt#X] -(67) Exchange +(69) Exchange Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: rangepartitioning(supplier_cnt#X DESC NULLS LAST, p_brand#X ASC NULLS FIRST, p_type#X ASC NULLS FIRST, p_size#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(68) Sort +(70) Sort Input [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: [supplier_cnt#X DESC NULLS LAST, p_brand#X ASC NULLS FIRST, p_type#X ASC NULLS FIRST, p_size#X ASC NULLS FIRST], true, 0 -(69) AdaptiveSparkPlan +(71) AdaptiveSparkPlan Output [4]: [p_brand#X, p_type#X, p_size#X, supplier_cnt#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/17.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/17.txt index 649bfcbe40e1..b46b3e3f2724 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/17.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/17.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (60) +AdaptiveSparkPlan (63) +- == Final Plan == VeloxColumnarToRowExec (40) +- ^ ProjectExecTransformer (38) @@ -35,25 +35,28 @@ AdaptiveSparkPlan (60) +- ^ FilterExecTransformer (22) +- ^ Scan parquet (21) +- == Initial Plan == - HashAggregate (59) - +- HashAggregate (58) - +- Project (57) - +- ShuffledHashJoin Inner BuildRight (56) - :- Project (49) - : +- ShuffledHashJoin Inner BuildRight (48) - : :- Exchange (43) - : : +- Filter (42) - : : +- Scan parquet (41) - : +- Exchange (47) - : +- Project (46) - : +- Filter (45) - : +- Scan parquet (44) - +- Filter (55) - +- HashAggregate (54) - +- Exchange (53) - +- HashAggregate (52) - +- Filter (51) - +- Scan parquet (50) + HashAggregate (62) + +- HashAggregate (61) + +- Project (60) + +- SortMergeJoin Inner (59) + :- Project (51) + : +- SortMergeJoin Inner (50) + : :- Sort (44) + : : +- Exchange (43) + : : +- Filter (42) + : : +- Scan parquet (41) + : +- Sort (49) + : +- Exchange (48) + : +- Project (47) + : +- Filter (46) + : +- Scan parquet (45) + +- Sort (58) + +- Filter (57) + +- HashAggregate (56) + +- Exchange (55) + +- HashAggregate (54) + +- Filter (53) + +- Scan parquet (52) (1) Scan parquet @@ -249,92 +252,104 @@ Condition : (isnotnull(l_partkey#X) AND isnotnull(l_quantity#X)) Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(44) Scan parquet +(44) Sort +Input [3]: [l_partkey#X, l_quantity#X, l_extendedprice#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(45) Scan parquet Output [3]: [p_partkey#X, p_brand#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_brand), IsNotNull(p_container), EqualTo(p_brand,Brand#X), EqualTo(p_container,MED BOX), IsNotNull(p_partkey)] ReadSchema: struct -(45) Filter +(46) Filter Input [3]: [p_partkey#X, p_brand#X, p_container#X] Condition : ((((isnotnull(p_brand#X) AND isnotnull(p_container#X)) AND (p_brand#X = Brand#X)) AND (p_container#X = MED BOX)) AND isnotnull(p_partkey#X)) -(46) Project +(47) Project Output [1]: [p_partkey#X] Input [3]: [p_partkey#X, p_brand#X, p_container#X] -(47) Exchange +(48) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) ShuffledHashJoin +(49) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(50) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join type: Inner Join condition: None -(49) Project +(51) Project Output [3]: [l_quantity#X, l_extendedprice#X, p_partkey#X] Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, p_partkey#X] -(50) Scan parquet +(52) Scan parquet Output [2]: [l_partkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey)] ReadSchema: struct -(51) Filter +(53) Filter Input [2]: [l_partkey#X, l_quantity#X] Condition : isnotnull(l_partkey#X) -(52) HashAggregate +(54) HashAggregate Input [2]: [l_partkey#X, l_quantity#X] Keys [1]: [l_partkey#X] Functions [1]: [partial_avg(l_quantity#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [3]: [l_partkey#X, sum#X, count#X] -(53) Exchange +(55) Exchange Input [3]: [l_partkey#X, sum#X, count#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(54) HashAggregate +(56) HashAggregate Input [3]: [l_partkey#X, sum#X, count#X] Keys [1]: [l_partkey#X] Functions [1]: [avg(l_quantity#X)] Aggregate Attributes [1]: [avg(l_quantity#X)#X] Results [2]: [(0.2 * avg(l_quantity#X)#X) AS (0.2 * avg(l_quantity))#X, l_partkey#X] -(55) Filter +(57) Filter Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] Condition : isnotnull((0.2 * avg(l_quantity))#X) -(56) ShuffledHashJoin +(58) Sort +Input [2]: [(0.2 * avg(l_quantity))#X, l_partkey#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(59) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join type: Inner Join condition: (cast(l_quantity#X as decimal(18,7)) < (0.2 * avg(l_quantity))#X) -(57) Project +(60) Project Output [1]: [l_extendedprice#X] Input [5]: [l_quantity#X, l_extendedprice#X, p_partkey#X, (0.2 * avg(l_quantity))#X, l_partkey#X] -(58) HashAggregate +(61) HashAggregate Input [1]: [l_extendedprice#X] Keys: [] Functions [1]: [partial_sum(l_extendedprice#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(59) HashAggregate +(62) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum(l_extendedprice#X)] Aggregate Attributes [1]: [sum(l_extendedprice#X)#X] Results [1]: [(sum(l_extendedprice#X)#X / 7.0) AS avg_yearly#X] -(60) AdaptiveSparkPlan +(63) AdaptiveSparkPlan Output [1]: [avg_yearly#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/18.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/18.txt index c3075e511782..febb48962446 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/18.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/18.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (103) +AdaptiveSparkPlan (110) +- == Final Plan == VeloxColumnarToRowExec (70) +- TakeOrderedAndProjectExecTransformer (69) @@ -58,38 +58,45 @@ AdaptiveSparkPlan (103) +- ShuffleQueryStage (57), Statistics(X) +- ReusedExchange (56) +- == Initial Plan == - TakeOrderedAndProject (102) - +- HashAggregate (101) - +- HashAggregate (100) - +- Project (99) - +- ShuffledHashJoin Inner BuildRight (98) - :- Exchange (87) - : +- Project (86) - : +- ShuffledHashJoin Inner BuildLeft (85) - : :- Exchange (73) - : : +- Filter (72) - : : +- Scan parquet (71) - : +- Exchange (84) - : +- ShuffledHashJoin LeftSemi BuildRight (83) - : :- Exchange (76) - : : +- Filter (75) - : : +- Scan parquet (74) - : +- Project (82) - : +- Filter (81) - : +- HashAggregate (80) - : +- Exchange (79) - : +- HashAggregate (78) - : +- Scan parquet (77) - +- ShuffledHashJoin LeftSemi BuildRight (97) - :- Exchange (90) - : +- Filter (89) - : +- Scan parquet (88) - +- Project (96) - +- Filter (95) - +- HashAggregate (94) - +- Exchange (93) - +- HashAggregate (92) - +- Scan parquet (91) + TakeOrderedAndProject (109) + +- HashAggregate (108) + +- HashAggregate (107) + +- Project (106) + +- SortMergeJoin Inner (105) + :- Sort (92) + : +- Exchange (91) + : +- Project (90) + : +- SortMergeJoin Inner (89) + : :- Sort (74) + : : +- Exchange (73) + : : +- Filter (72) + : : +- Scan parquet (71) + : +- Sort (88) + : +- Exchange (87) + : +- SortMergeJoin LeftSemi (86) + : :- Sort (78) + : : +- Exchange (77) + : : +- Filter (76) + : : +- Scan parquet (75) + : +- Sort (85) + : +- Project (84) + : +- Filter (83) + : +- HashAggregate (82) + : +- Exchange (81) + : +- HashAggregate (80) + : +- Scan parquet (79) + +- SortMergeJoin LeftSemi (104) + :- Sort (96) + : +- Exchange (95) + : +- Filter (94) + : +- Scan parquet (93) + +- Sort (103) + +- Project (102) + +- Filter (101) + +- HashAggregate (100) + +- Exchange (99) + +- HashAggregate (98) + +- Scan parquet (97) (1) Scan parquet @@ -405,158 +412,186 @@ Condition : isnotnull(c_custkey#X) Input [2]: [c_custkey#X, c_name#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(74) Scan parquet +(74) Sort +Input [2]: [c_custkey#X, c_name#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(75) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(75) Filter +(76) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Condition : (isnotnull(o_custkey#X) AND isnotnull(o_orderkey#X)) -(76) Exchange +(77) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(77) Scan parquet +(78) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(79) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(78) HashAggregate +(80) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(79) Exchange +(81) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(80) HashAggregate +(82) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(81) Filter +(83) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(82) Project +(84) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(83) ShuffledHashJoin +(85) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(86) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: LeftSemi Join condition: None -(84) Exchange +(87) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(85) ShuffledHashJoin +(88) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(89) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join type: Inner Join condition: None -(86) Project +(90) Project Output [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_custkey#X, o_totalprice#X, o_orderdate#X] -(87) Exchange +(91) Exchange Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(88) Scan parquet +(92) Sort +Input [5]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(93) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_orderkey)] ReadSchema: struct -(89) Filter +(94) Filter Input [2]: [l_orderkey#X, l_quantity#X] Condition : isnotnull(l_orderkey#X) -(90) Exchange +(95) Exchange Input [2]: [l_orderkey#X, l_quantity#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(91) Scan parquet +(96) Sort +Input [2]: [l_orderkey#X, l_quantity#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(97) Scan parquet Output [2]: [l_orderkey#X, l_quantity#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(92) HashAggregate +(98) HashAggregate Input [2]: [l_orderkey#X, l_quantity#X] Keys [1]: [l_orderkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [l_orderkey#X, sum#X, isEmpty#X] -(93) Exchange +(99) Exchange Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(94) HashAggregate +(100) HashAggregate Input [3]: [l_orderkey#X, sum#X, isEmpty#X] Keys [1]: [l_orderkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [2]: [l_orderkey#X, sum(l_quantity#X)#X AS sum(l_quantity#X)#X] -(95) Filter +(101) Filter Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] Condition : (isnotnull(sum(l_quantity#X)#X) AND (sum(l_quantity#X)#X > 300.00)) -(96) Project +(102) Project Output [1]: [l_orderkey#X] Input [2]: [l_orderkey#X, sum(l_quantity#X)#X] -(97) ShuffledHashJoin +(103) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(104) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: LeftSemi Join condition: None -(98) ShuffledHashJoin +(105) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: Inner Join condition: None -(99) Project +(106) Project Output [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Input [7]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_orderkey#X, l_quantity#X] -(100) HashAggregate +(107) HashAggregate Input [6]: [c_custkey#X, c_name#X, o_orderkey#X, o_totalprice#X, o_orderdate#X, l_quantity#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] -(101) HashAggregate +(108) HashAggregate Input [7]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum#X, isEmpty#X] Keys [5]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity#X)#X AS sum(l_quantity)#X] -(102) TakeOrderedAndProject +(109) TakeOrderedAndProject Input [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: X, [o_totalprice#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] -(103) AdaptiveSparkPlan +(110) AdaptiveSparkPlan Output [6]: [c_name#X, c_custkey#X, o_orderkey#X, o_orderdate#X, o_totalprice#X, sum(l_quantity)#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/19.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/19.txt index baf4b2a51607..fa78645313e4 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/19.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/19.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (36) +AdaptiveSparkPlan (38) +- == Final Plan == VeloxColumnarToRowExec (24) +- ^ RegularHashAggregateExecTransformer (22) @@ -21,17 +21,19 @@ AdaptiveSparkPlan (36) +- ^ FilterExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - HashAggregate (35) - +- HashAggregate (34) - +- Project (33) - +- ShuffledHashJoin Inner BuildRight (32) - :- Exchange (28) - : +- Project (27) - : +- Filter (26) - : +- Scan parquet (25) - +- Exchange (31) - +- Filter (30) - +- Scan parquet (29) + HashAggregate (37) + +- HashAggregate (36) + +- Project (35) + +- SortMergeJoin Inner (34) + :- Sort (29) + : +- Exchange (28) + : +- Project (27) + : +- Filter (26) + : +- Scan parquet (25) + +- Sort (33) + +- Exchange (32) + +- Filter (31) + +- Scan parquet (30) (1) Scan parquet @@ -158,45 +160,53 @@ Input [6]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, l_shipin Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(29) Scan parquet +(29) Sort +Input [4]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(30) Scan parquet Output [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_size), GreaterThanOrEqual(p_size,1), IsNotNull(p_partkey), Or(Or(And(And(EqualTo(p_brand,Brand#X),In(p_container, [SM BOX,SM CASE,SM PACK,SM PKG])),LessThanOrEqual(p_size,5)),And(And(EqualTo(p_brand,Brand#X),In(p_container, [MED BAG,MED BOX,MED PACK,MED PKG])),LessThanOrEqual(p_size,10))),And(And(EqualTo(p_brand,Brand#X),In(p_container, [LG BOX,LG CASE,LG PACK,LG PKG])),LessThanOrEqual(p_size,15)))] ReadSchema: struct -(30) Filter +(31) Filter Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Condition : (((isnotnull(p_size#X) AND (p_size#X >= 1)) AND isnotnull(p_partkey#X)) AND (((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (p_size#X <= 5)) OR (((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (p_size#X <= 10))) OR (((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (p_size#X <= 15)))) -(31) Exchange +(32) Exchange Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(32) ShuffledHashJoin +(33) Sort +Input [4]: [p_partkey#X, p_brand#X, p_size#X, p_container#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(34) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join type: Inner Join condition: (((((((p_brand#X = Brand#X) AND p_container#X IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (l_quantity#X >= 1.00)) AND (l_quantity#X <= 11.00)) AND (p_size#X <= 5)) OR (((((p_brand#X = Brand#X) AND p_container#X IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (l_quantity#X >= 10.00)) AND (l_quantity#X <= 20.00)) AND (p_size#X <= 10))) OR (((((p_brand#X = Brand#X) AND p_container#X IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (l_quantity#X >= 20.00)) AND (l_quantity#X <= 30.00)) AND (p_size#X <= 15))) -(33) Project +(35) Project Output [2]: [l_extendedprice#X, l_discount#X] Input [8]: [l_partkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, p_partkey#X, p_brand#X, p_size#X, p_container#X] -(34) HashAggregate +(36) HashAggregate Input [2]: [l_extendedprice#X, l_discount#X] Keys: [] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [2]: [sum#X, isEmpty#X] -(35) HashAggregate +(37) HashAggregate Input [2]: [sum#X, isEmpty#X] Keys: [] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X] -(36) AdaptiveSparkPlan +(38) AdaptiveSparkPlan Output [1]: [revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/20.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/20.txt index 7ddecfe855eb..bb9987fc32c1 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/20.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/20.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (133) +AdaptiveSparkPlan (143) +- == Final Plan == VeloxColumnarToRowExec (93) +- AQEShuffleRead (92) @@ -75,45 +75,55 @@ AdaptiveSparkPlan (133) +- ^ FilterExecTransformer (78) +- ^ Scan parquet (77) +- == Initial Plan == - Sort (132) - +- Exchange (131) - +- Project (130) - +- ShuffledHashJoin Inner BuildRight (129) - :- Exchange (124) - : +- Project (123) - : +- ShuffledHashJoin LeftSemi BuildRight (122) - : :- Exchange (96) - : : +- Filter (95) - : : +- Scan parquet (94) - : +- Exchange (121) - : +- Project (120) - : +- ShuffledHashJoin Inner BuildLeft (119) - : :- Exchange (105) - : : +- ShuffledHashJoin LeftSemi BuildRight (104) - : : :- Exchange (99) - : : : +- Filter (98) - : : : +- Scan parquet (97) - : : +- Exchange (103) - : : +- Project (102) - : : +- Filter (101) - : : +- Scan parquet (100) - : +- Exchange (118) - : +- Filter (117) - : +- HashAggregate (116) - : +- HashAggregate (115) - : +- ShuffledHashJoin LeftSemi BuildRight (114) - : :- Exchange (109) - : : +- Project (108) - : : +- Filter (107) - : : +- Scan parquet (106) - : +- Exchange (113) - : +- Project (112) - : +- Filter (111) - : +- Scan parquet (110) - +- Exchange (128) - +- Project (127) - +- Filter (126) - +- Scan parquet (125) + Sort (142) + +- Exchange (141) + +- Project (140) + +- SortMergeJoin Inner (139) + :- Sort (133) + : +- Exchange (132) + : +- Project (131) + : +- SortMergeJoin LeftSemi (130) + : :- Sort (97) + : : +- Exchange (96) + : : +- Filter (95) + : : +- Scan parquet (94) + : +- Sort (129) + : +- Exchange (128) + : +- Project (127) + : +- SortMergeJoin Inner (126) + : :- Sort (109) + : : +- Exchange (108) + : : +- SortMergeJoin LeftSemi (107) + : : :- Sort (101) + : : : +- Exchange (100) + : : : +- Filter (99) + : : : +- Scan parquet (98) + : : +- Sort (106) + : : +- Exchange (105) + : : +- Project (104) + : : +- Filter (103) + : : +- Scan parquet (102) + : +- Sort (125) + : +- Exchange (124) + : +- Filter (123) + : +- HashAggregate (122) + : +- HashAggregate (121) + : +- SortMergeJoin LeftSemi (120) + : :- Sort (114) + : : +- Exchange (113) + : : +- Project (112) + : : +- Filter (111) + : : +- Scan parquet (110) + : +- Sort (119) + : +- Exchange (118) + : +- Project (117) + : +- Filter (116) + : +- Scan parquet (115) + +- Sort (138) + +- Exchange (137) + +- Project (136) + +- Filter (135) + +- Scan parquet (134) (1) Scan parquet @@ -512,181 +522,221 @@ Condition : isnotnull(s_nationkey#X) Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(97) Scan parquet +(97) Sort +Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(98) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_availqty), IsNotNull(ps_partkey), IsNotNull(ps_suppkey)] ReadSchema: struct -(98) Filter +(99) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Condition : ((isnotnull(ps_availqty#X) AND isnotnull(ps_partkey#X)) AND isnotnull(ps_suppkey#X)) -(99) Exchange +(100) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(100) Scan parquet +(101) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: [ps_partkey#X ASC NULLS FIRST], false, 0 + +(102) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(101) Filter +(103) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(102) Project +(104) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(103) Exchange +(105) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(104) ShuffledHashJoin +(106) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(107) SortMergeJoin Left keys [1]: [ps_partkey#X] Right keys [1]: [p_partkey#X] Join type: LeftSemi Join condition: None -(105) Exchange +(108) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] Arguments: hashpartitioning(ps_partkey#X, ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(106) Scan parquet +(109) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X] +Arguments: [ps_partkey#X ASC NULLS FIRST, ps_suppkey#X ASC NULLS FIRST], false, 0 + +(110) Scan parquet Output [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)] ReadSchema: struct -(107) Filter +(111) Filter Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1994-01-01)) AND (l_shipdate#X < 1995-01-01)) AND isnotnull(l_partkey#X)) AND isnotnull(l_suppkey#X)) -(108) Project +(112) Project Output [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Input [4]: [l_partkey#X, l_suppkey#X, l_quantity#X, l_shipdate#X] -(109) Exchange +(113) Exchange Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(110) Scan parquet +(114) Sort +Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(115) Scan parquet Output [2]: [p_partkey#X, p_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)] ReadSchema: struct -(111) Filter +(116) Filter Input [2]: [p_partkey#X, p_name#X] Condition : (isnotnull(p_name#X) AND StartsWith(p_name#X, forest)) -(112) Project +(117) Project Output [1]: [p_partkey#X] Input [2]: [p_partkey#X, p_name#X] -(113) Exchange +(118) Exchange Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(119) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(120) SortMergeJoin Left keys [1]: [l_partkey#X] Right keys [1]: [p_partkey#X] Join type: LeftSemi Join condition: None -(115) HashAggregate +(121) HashAggregate Input [3]: [l_partkey#X, l_suppkey#X, l_quantity#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [partial_sum(l_quantity#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] -(116) HashAggregate +(122) HashAggregate Input [4]: [l_partkey#X, l_suppkey#X, sum#X, isEmpty#X] Keys [2]: [l_partkey#X, l_suppkey#X] Functions [1]: [sum(l_quantity#X)] Aggregate Attributes [1]: [sum(l_quantity#X)#X] Results [3]: [(0.5 * sum(l_quantity#X)#X) AS (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(117) Filter +(123) Filter Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Condition : isnotnull((0.5 * sum(l_quantity))#X) -(118) Exchange +(124) Exchange Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] Arguments: hashpartitioning(l_partkey#X, l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(119) ShuffledHashJoin +(125) Sort +Input [3]: [(0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] +Arguments: [l_partkey#X ASC NULLS FIRST, l_suppkey#X ASC NULLS FIRST], false, 0 + +(126) SortMergeJoin Left keys [2]: [ps_partkey#X, ps_suppkey#X] Right keys [2]: [l_partkey#X, l_suppkey#X] Join type: Inner Join condition: (cast(ps_availqty#X as decimal(24,3)) > (0.5 * sum(l_quantity))#X) -(120) Project +(127) Project Output [1]: [ps_suppkey#X] Input [6]: [ps_partkey#X, ps_suppkey#X, ps_availqty#X, (0.5 * sum(l_quantity))#X, l_partkey#X, l_suppkey#X] -(121) Exchange +(128) Exchange Input [1]: [ps_suppkey#X] Arguments: hashpartitioning(ps_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(122) ShuffledHashJoin +(129) Sort +Input [1]: [ps_suppkey#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST], false, 0 + +(130) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [ps_suppkey#X] Join type: LeftSemi Join condition: None -(123) Project +(131) Project Output [3]: [s_name#X, s_address#X, s_nationkey#X] Input [4]: [s_suppkey#X, s_name#X, s_address#X, s_nationkey#X] -(124) Exchange +(132) Exchange Input [3]: [s_name#X, s_address#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(125) Scan parquet +(133) Sort +Input [3]: [s_name#X, s_address#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(134) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct -(126) Filter +(135) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = CANADA)) AND isnotnull(n_nationkey#X)) -(127) Project +(136) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(128) Exchange +(137) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) ShuffledHashJoin +(138) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(139) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(130) Project +(140) Project Output [2]: [s_name#X, s_address#X] Input [4]: [s_name#X, s_address#X, s_nationkey#X, n_nationkey#X] -(131) Exchange +(141) Exchange Input [2]: [s_name#X, s_address#X] Arguments: rangepartitioning(s_name#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) Sort +(142) Sort Input [2]: [s_name#X, s_address#X] Arguments: [s_name#X ASC NULLS FIRST], true, 0 -(133) AdaptiveSparkPlan +(143) AdaptiveSparkPlan Output [2]: [s_name#X, s_address#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/21.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/21.txt index 7b8c173fc086..5e8c9ad9f92a 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/21.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/21.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (128) +AdaptiveSparkPlan (137) +- == Final Plan == VeloxColumnarToRowExec (91) +- ^ RegularHashAggregateExecTransformer (89) @@ -72,42 +72,51 @@ AdaptiveSparkPlan (128) +- ^ FilterExecTransformer (71) +- ^ Scan parquet (70) +- == Initial Plan == - TakeOrderedAndProject (127) - +- HashAggregate (126) - +- Exchange (125) - +- HashAggregate (124) - +- Project (123) - +- ShuffledHashJoin Inner BuildRight (122) - :- Exchange (117) - : +- Project (116) - : +- ShuffledHashJoin Inner BuildRight (115) - : :- Exchange (110) - : : +- Project (109) - : : +- ShuffledHashJoin Inner BuildLeft (108) - : : :- Exchange (94) - : : : +- Filter (93) - : : : +- Scan parquet (92) - : : +- Exchange (107) - : : +- ShuffledHashJoin LeftAnti BuildRight (106) - : : :- ShuffledHashJoin LeftSemi BuildRight (101) - : : : :- Exchange (98) - : : : : +- Project (97) - : : : : +- Filter (96) - : : : : +- Scan parquet (95) - : : : +- Exchange (100) - : : : +- Scan parquet (99) - : : +- Exchange (105) - : : +- Project (104) - : : +- Filter (103) - : : +- Scan parquet (102) - : +- Exchange (114) - : +- Project (113) - : +- Filter (112) - : +- Scan parquet (111) - +- Exchange (121) - +- Project (120) - +- Filter (119) - +- Scan parquet (118) + TakeOrderedAndProject (136) + +- HashAggregate (135) + +- Exchange (134) + +- HashAggregate (133) + +- Project (132) + +- SortMergeJoin Inner (131) + :- Sort (125) + : +- Exchange (124) + : +- Project (123) + : +- SortMergeJoin Inner (122) + : :- Sort (116) + : : +- Exchange (115) + : : +- Project (114) + : : +- SortMergeJoin Inner (113) + : : :- Sort (95) + : : : +- Exchange (94) + : : : +- Filter (93) + : : : +- Scan parquet (92) + : : +- Sort (112) + : : +- Exchange (111) + : : +- SortMergeJoin LeftAnti (110) + : : :- SortMergeJoin LeftSemi (104) + : : : :- Sort (100) + : : : : +- Exchange (99) + : : : : +- Project (98) + : : : : +- Filter (97) + : : : : +- Scan parquet (96) + : : : +- Sort (103) + : : : +- Exchange (102) + : : : +- Scan parquet (101) + : : +- Sort (109) + : : +- Exchange (108) + : : +- Project (107) + : : +- Filter (106) + : : +- Scan parquet (105) + : +- Sort (121) + : +- Exchange (120) + : +- Project (119) + : +- Filter (118) + : +- Scan parquet (117) + +- Sort (130) + +- Exchange (129) + +- Project (128) + +- Filter (127) + +- Scan parquet (126) (1) Scan parquet @@ -501,168 +510,204 @@ Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(95) Scan parquet +(95) Sort +Input [3]: [s_suppkey#X, s_name#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(96) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(96) Filter +(97) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((((isnotnull(l_receiptdate#X) AND isnotnull(l_commitdate#X)) AND (l_receiptdate#X > l_commitdate#X)) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(97) Project +(98) Project Output [2]: [l_orderkey#X, l_suppkey#X] Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] -(98) Exchange +(99) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(99) Scan parquet +(100) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(101) Scan parquet Output [2]: [l_orderkey#X, l_suppkey#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(100) Exchange +(102) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(101) ShuffledHashJoin +(103) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(104) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: LeftSemi Join condition: NOT (l_suppkey#X = l_suppkey#X) -(102) Scan parquet +(105) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate)] ReadSchema: struct -(103) Filter +(106) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((isnotnull(l_receiptdate#X) AND isnotnull(l_commitdate#X)) AND (l_receiptdate#X > l_commitdate#X)) -(104) Project +(107) Project Output [2]: [l_orderkey#X, l_suppkey#X] Input [4]: [l_orderkey#X, l_suppkey#X, l_commitdate#X, l_receiptdate#X] -(105) Exchange +(108) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(106) ShuffledHashJoin +(109) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(110) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: LeftAnti Join condition: NOT (l_suppkey#X = l_suppkey#X) -(107) Exchange +(111) Exchange Input [2]: [l_orderkey#X, l_suppkey#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(108) ShuffledHashJoin +(112) Sort +Input [2]: [l_orderkey#X, l_suppkey#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(113) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [l_suppkey#X] Join type: Inner Join condition: None -(109) Project +(114) Project Output [3]: [s_name#X, s_nationkey#X, l_orderkey#X] Input [5]: [s_suppkey#X, s_name#X, s_nationkey#X, l_orderkey#X, l_suppkey#X] -(110) Exchange +(115) Exchange Input [3]: [s_name#X, s_nationkey#X, l_orderkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(111) Scan parquet +(116) Sort +Input [3]: [s_name#X, s_nationkey#X, l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(117) Scan parquet Output [2]: [o_orderkey#X, o_orderstatus#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderstatus), EqualTo(o_orderstatus,F), IsNotNull(o_orderkey)] ReadSchema: struct -(112) Filter +(118) Filter Input [2]: [o_orderkey#X, o_orderstatus#X] Condition : ((isnotnull(o_orderstatus#X) AND (o_orderstatus#X = F)) AND isnotnull(o_orderkey#X)) -(113) Project +(119) Project Output [1]: [o_orderkey#X] Input [2]: [o_orderkey#X, o_orderstatus#X] -(114) Exchange +(120) Exchange Input [1]: [o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(115) ShuffledHashJoin +(121) Sort +Input [1]: [o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(122) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join type: Inner Join condition: None -(116) Project +(123) Project Output [2]: [s_name#X, s_nationkey#X] Input [4]: [s_name#X, s_nationkey#X, l_orderkey#X, o_orderkey#X] -(117) Exchange +(124) Exchange Input [2]: [s_name#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(118) Scan parquet +(125) Sort +Input [2]: [s_name#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(126) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,SAUDI ARABIA), IsNotNull(n_nationkey)] ReadSchema: struct -(119) Filter +(127) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : ((isnotnull(n_name#X) AND (n_name#X = SAUDI ARABIA)) AND isnotnull(n_nationkey#X)) -(120) Project +(128) Project Output [1]: [n_nationkey#X] Input [2]: [n_nationkey#X, n_name#X] -(121) Exchange +(129) Exchange Input [1]: [n_nationkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(122) ShuffledHashJoin +(130) Sort +Input [1]: [n_nationkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(131) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(123) Project +(132) Project Output [1]: [s_name#X] Input [3]: [s_name#X, s_nationkey#X, n_nationkey#X] -(124) HashAggregate +(133) HashAggregate Input [1]: [s_name#X] Keys [1]: [s_name#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [s_name#X, count#X] -(125) Exchange +(134) Exchange Input [2]: [s_name#X, count#X] Arguments: hashpartitioning(s_name#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) HashAggregate +(135) HashAggregate Input [2]: [s_name#X, count#X] Keys [1]: [s_name#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [s_name#X, count(1)#X AS numwait#X] -(127) TakeOrderedAndProject +(136) TakeOrderedAndProject Input [2]: [s_name#X, numwait#X] Arguments: X, [numwait#X DESC NULLS LAST, s_name#X ASC NULLS FIRST], [s_name#X, numwait#X] -(128) AdaptiveSparkPlan +(137) AdaptiveSparkPlan Output [2]: [s_name#X, numwait#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/22.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/22.txt index d6ec93a97fc6..50f1c1bdef30 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/22.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/22.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (50) +AdaptiveSparkPlan (52) +- == Final Plan == VeloxColumnarToRowExec (37) +- ^ SortExecTransformer (35) @@ -30,18 +30,20 @@ AdaptiveSparkPlan (50) +- ^ ProjectExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (49) - +- Exchange (48) - +- HashAggregate (47) - +- Exchange (46) - +- HashAggregate (45) - +- Project (44) - +- ShuffledHashJoin LeftAnti BuildRight (43) - :- Exchange (40) - : +- Filter (39) - : +- Scan parquet (38) - +- Exchange (42) - +- Scan parquet (41) + Sort (51) + +- Exchange (50) + +- HashAggregate (49) + +- Exchange (48) + +- HashAggregate (47) + +- Project (46) + +- SortMergeJoin LeftAnti (45) + :- Sort (41) + : +- Exchange (40) + : +- Filter (39) + : +- Scan parquet (38) + +- Sort (44) + +- Exchange (43) + +- Scan parquet (42) (1) Scan parquet @@ -211,192 +213,200 @@ Condition : ((isnotnull(c_acctbal#X) AND substring(c_phone#X, 1, 2) IN (13,31,23 Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(41) Scan parquet +(41) Sort +Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(42) Scan parquet Output [1]: [o_custkey#X] Batched: true Location: InMemoryFileIndex [*] ReadSchema: struct -(42) Exchange +(43) Exchange Input [1]: [o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) ShuffledHashJoin +(44) Sort +Input [1]: [o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(45) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join type: LeftAnti Join condition: None -(44) Project +(46) Project Output [2]: [substring(c_phone#X, 1, 2) AS cntrycode#X, c_acctbal#X] Input [3]: [c_custkey#X, c_phone#X, c_acctbal#X] -(45) HashAggregate +(47) HashAggregate Input [2]: [cntrycode#X, c_acctbal#X] Keys [1]: [cntrycode#X] Functions [2]: [partial_count(1), partial_sum(c_acctbal#X)] Aggregate Attributes [3]: [count#X, sum#X, isEmpty#X] Results [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] -(46) Exchange +(48) Exchange Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] Arguments: hashpartitioning(cntrycode#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) HashAggregate +(49) HashAggregate Input [4]: [cntrycode#X, count#X, sum#X, isEmpty#X] Keys [1]: [cntrycode#X] Functions [2]: [count(1), sum(c_acctbal#X)] Aggregate Attributes [2]: [count(1)#X, sum(c_acctbal#X)#X] Results [3]: [cntrycode#X, count(1)#X AS numcust#X, sum(c_acctbal#X)#X AS totacctbal#X] -(48) Exchange +(50) Exchange Input [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: rangepartitioning(cntrycode#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(49) Sort +(51) Sort Input [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: [cntrycode#X ASC NULLS FIRST], true, 0 -(50) AdaptiveSparkPlan +(52) AdaptiveSparkPlan Output [3]: [cntrycode#X, numcust#X, totacctbal#X] Arguments: isFinalPlan=true ===== Subqueries ===== Subquery:1 Hosting operator id = 2 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (70) +AdaptiveSparkPlan (72) +- == Final Plan == - VeloxColumnarToRowExec (63) - +- ^ RegularHashAggregateExecTransformer (61) - +- ^ InputIteratorTransformer (60) - +- ShuffleQueryStage (58), Statistics(X) - +- ColumnarExchange (57) - +- VeloxAppendBatches (56) - +- ^ FlushableHashAggregateExecTransformer (54) - +- ^ ProjectExecTransformer (53) - +- ^ FilterExecTransformer (52) - +- ^ Scan parquet (51) + VeloxColumnarToRowExec (65) + +- ^ RegularHashAggregateExecTransformer (63) + +- ^ InputIteratorTransformer (62) + +- ShuffleQueryStage (60), Statistics(X) + +- ColumnarExchange (59) + +- VeloxAppendBatches (58) + +- ^ FlushableHashAggregateExecTransformer (56) + +- ^ ProjectExecTransformer (55) + +- ^ FilterExecTransformer (54) + +- ^ Scan parquet (53) +- == Initial Plan == - HashAggregate (69) - +- Exchange (68) - +- HashAggregate (67) - +- Project (66) - +- Filter (65) - +- Scan parquet (64) + HashAggregate (71) + +- Exchange (70) + +- HashAggregate (69) + +- Project (68) + +- Filter (67) + +- Scan parquet (66) -(51) Scan parquet +(53) Scan parquet Output [2]: [c_phone#X, c_acctbal#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_acctbal), GreaterThan(c_acctbal,0.00)] ReadSchema: struct -(52) FilterExecTransformer +(54) FilterExecTransformer Input [2]: [c_phone#X, c_acctbal#X] Arguments: ((isnotnull(c_acctbal#X) AND (c_acctbal#X > 0.00)) AND substring(c_phone#X, 1, 2) IN (13,31,23,29,30,18,17)) -(53) ProjectExecTransformer +(55) ProjectExecTransformer Output [1]: [c_acctbal#X] Input [2]: [c_phone#X, c_acctbal#X] -(54) FlushableHashAggregateExecTransformer +(56) FlushableHashAggregateExecTransformer Input [1]: [c_acctbal#X] Keys: [] Functions [1]: [partial_avg(c_acctbal#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [2]: [sum#X, count#X] -(55) WholeStageCodegenTransformer (X) +(57) WholeStageCodegenTransformer (X) Input [2]: [sum#X, count#X] Arguments: false -(56) VeloxAppendBatches +(58) VeloxAppendBatches Input [2]: [sum#X, count#X] Arguments: X -(57) ColumnarExchange +(59) ColumnarExchange Input [2]: [sum#X, count#X] Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=X], [id=#X] -(58) ShuffleQueryStage +(60) ShuffleQueryStage Output [2]: [sum#X, count#X] Arguments: X -(59) InputAdapter +(61) InputAdapter Input [2]: [sum#X, count#X] -(60) InputIteratorTransformer +(62) InputIteratorTransformer Input [2]: [sum#X, count#X] -(61) RegularHashAggregateExecTransformer +(63) RegularHashAggregateExecTransformer Input [2]: [sum#X, count#X] Keys: [] Functions [1]: [avg(c_acctbal#X)] Aggregate Attributes [1]: [avg(c_acctbal#X)#X] Results [1]: [avg(c_acctbal#X)#X AS avg(c_acctbal)#X] -(62) WholeStageCodegenTransformer (X) +(64) WholeStageCodegenTransformer (X) Input [1]: [avg(c_acctbal)#X] Arguments: false -(63) VeloxColumnarToRowExec +(65) VeloxColumnarToRowExec Input [1]: [avg(c_acctbal)#X] -(64) Scan parquet +(66) Scan parquet Output [2]: [c_phone#X, c_acctbal#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_acctbal), GreaterThan(c_acctbal,0.00)] ReadSchema: struct -(65) Filter +(67) Filter Input [2]: [c_phone#X, c_acctbal#X] Condition : ((isnotnull(c_acctbal#X) AND (c_acctbal#X > 0.00)) AND substring(c_phone#X, 1, 2) IN (13,31,23,29,30,18,17)) -(66) Project +(68) Project Output [1]: [c_acctbal#X] Input [2]: [c_phone#X, c_acctbal#X] -(67) HashAggregate +(69) HashAggregate Input [1]: [c_acctbal#X] Keys: [] Functions [1]: [partial_avg(c_acctbal#X)] Aggregate Attributes [2]: [sum#X, count#X] Results [2]: [sum#X, count#X] -(68) Exchange +(70) Exchange Input [2]: [sum#X, count#X] Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=X] -(69) HashAggregate +(71) HashAggregate Input [2]: [sum#X, count#X] Keys: [] Functions [1]: [avg(c_acctbal#X)] Aggregate Attributes [1]: [avg(c_acctbal#X)#X] Results [1]: [avg(c_acctbal#X)#X AS avg(c_acctbal)#X] -(70) AdaptiveSparkPlan +(72) AdaptiveSparkPlan Output [1]: [avg(c_acctbal)#X] Arguments: isFinalPlan=true Subquery:2 Hosting operator id = 1 Hosting Expression = Subquery subquery#X, [id=#X] -AdaptiveSparkPlan (70) +AdaptiveSparkPlan (72) +- == Final Plan == - VeloxColumnarToRowExec (63) - +- ^ RegularHashAggregateExecTransformer (61) - +- ^ InputIteratorTransformer (60) - +- ShuffleQueryStage (58), Statistics(X) - +- ColumnarExchange (57) - +- VeloxAppendBatches (56) - +- ^ FlushableHashAggregateExecTransformer (54) - +- ^ ProjectExecTransformer (53) - +- ^ FilterExecTransformer (52) - +- ^ Scan parquet (51) + VeloxColumnarToRowExec (65) + +- ^ RegularHashAggregateExecTransformer (63) + +- ^ InputIteratorTransformer (62) + +- ShuffleQueryStage (60), Statistics(X) + +- ColumnarExchange (59) + +- VeloxAppendBatches (58) + +- ^ FlushableHashAggregateExecTransformer (56) + +- ^ ProjectExecTransformer (55) + +- ^ FilterExecTransformer (54) + +- ^ Scan parquet (53) +- == Initial Plan == - HashAggregate (69) - +- Exchange (68) - +- HashAggregate (67) - +- Project (66) - +- Filter (65) - +- Scan parquet (64) \ No newline at end of file + HashAggregate (71) + +- Exchange (70) + +- HashAggregate (69) + +- Project (68) + +- Filter (67) + +- Scan parquet (66) \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/3.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/3.txt index 709a1700b5c7..50ad3b59c347 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/3.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/3.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (63) +AdaptiveSparkPlan (67) +- == Final Plan == VeloxColumnarToRowExec (43) +- TakeOrderedAndProjectExecTransformer (42) @@ -36,25 +36,29 @@ AdaptiveSparkPlan (63) +- ^ FilterExecTransformer (28) +- ^ Scan parquet (27) +- == Initial Plan == - TakeOrderedAndProject (62) - +- HashAggregate (61) - +- HashAggregate (60) - +- Project (59) - +- ShuffledHashJoin Inner BuildRight (58) - :- Exchange (53) - : +- Project (52) - : +- ShuffledHashJoin Inner BuildLeft (51) - : :- Exchange (47) - : : +- Project (46) - : : +- Filter (45) - : : +- Scan parquet (44) - : +- Exchange (50) - : +- Filter (49) - : +- Scan parquet (48) - +- Exchange (57) - +- Project (56) - +- Filter (55) - +- Scan parquet (54) + TakeOrderedAndProject (66) + +- HashAggregate (65) + +- HashAggregate (64) + +- Project (63) + +- SortMergeJoin Inner (62) + :- Sort (56) + : +- Exchange (55) + : +- Project (54) + : +- SortMergeJoin Inner (53) + : :- Sort (48) + : : +- Exchange (47) + : : +- Project (46) + : : +- Filter (45) + : : +- Scan parquet (44) + : +- Sort (52) + : +- Exchange (51) + : +- Filter (50) + : +- Scan parquet (49) + +- Sort (61) + +- Exchange (60) + +- Project (59) + +- Filter (58) + +- Scan parquet (57) (1) Scan parquet @@ -258,82 +262,98 @@ Input [2]: [c_custkey#X, c_mktsegment#X] Input [1]: [c_custkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(48) Scan parquet +(48) Sort +Input [1]: [c_custkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(49) Scan parquet Output [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), LessThan(o_orderdate,1995-03-15), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(49) Filter +(50) Filter Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Condition : (((isnotnull(o_orderdate#X) AND (o_orderdate#X < 1995-03-15)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(50) Exchange +(51) Exchange Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(51) ShuffledHashJoin +(52) Sort +Input [4]: [o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(53) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join type: Inner Join condition: None -(52) Project +(54) Project Output [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Input [5]: [c_custkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X, o_shippriority#X] -(53) Exchange +(55) Exchange Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(54) Scan parquet +(56) Sort +Input [3]: [o_orderkey#X, o_orderdate#X, o_shippriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(57) Scan parquet Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThan(l_shipdate,1995-03-15), IsNotNull(l_orderkey)] ReadSchema: struct -(55) Filter +(58) Filter Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((isnotnull(l_shipdate#X) AND (l_shipdate#X > 1995-03-15)) AND isnotnull(l_orderkey#X)) -(56) Project +(59) Project Output [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(57) Exchange +(60) Exchange Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(58) ShuffledHashJoin +(61) Sort +Input [3]: [l_orderkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(62) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: Inner Join condition: None -(59) Project +(63) Project Output [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Input [6]: [o_orderkey#X, o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] -(60) HashAggregate +(64) HashAggregate Input [5]: [o_orderdate#X, o_shippriority#X, l_orderkey#X, l_extendedprice#X, l_discount#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] -(61) HashAggregate +(65) HashAggregate Input [5]: [l_orderkey#X, o_orderdate#X, o_shippriority#X, sum#X, isEmpty#X] Keys [3]: [l_orderkey#X, o_orderdate#X, o_shippriority#X] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [4]: [l_orderkey#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X, o_orderdate#X, o_shippriority#X] -(62) TakeOrderedAndProject +(66) TakeOrderedAndProject Input [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: X, [revenue#X DESC NULLS LAST, o_orderdate#X ASC NULLS FIRST], [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] -(63) AdaptiveSparkPlan +(67) AdaptiveSparkPlan Output [4]: [l_orderkey#X, revenue#X, o_orderdate#X, o_shippriority#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/4.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/4.txt index a82dbf288086..2db46753e9fc 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/4.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/4.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (54) +AdaptiveSparkPlan (56) +- == Final Plan == VeloxColumnarToRowExec (38) +- ^ SortExecTransformer (36) @@ -31,21 +31,23 @@ AdaptiveSparkPlan (54) +- ^ FilterExecTransformer (11) +- ^ Scan parquet (10) +- == Initial Plan == - Sort (53) - +- Exchange (52) - +- HashAggregate (51) - +- Exchange (50) - +- HashAggregate (49) - +- Project (48) - +- ShuffledHashJoin LeftSemi BuildRight (47) - :- Exchange (42) - : +- Project (41) - : +- Filter (40) - : +- Scan parquet (39) - +- Exchange (46) - +- Project (45) - +- Filter (44) - +- Scan parquet (43) + Sort (55) + +- Exchange (54) + +- HashAggregate (53) + +- Exchange (52) + +- HashAggregate (51) + +- Project (50) + +- SortMergeJoin LeftSemi (49) + :- Sort (43) + : +- Exchange (42) + : +- Project (41) + : +- Filter (40) + : +- Scan parquet (39) + +- Sort (48) + +- Exchange (47) + +- Project (46) + +- Filter (45) + +- Scan parquet (44) (1) Scan parquet @@ -224,61 +226,69 @@ Input [3]: [o_orderkey#X, o_orderdate#X, o_orderpriority#X] Input [2]: [o_orderkey#X, o_orderpriority#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(43) Scan parquet +(43) Sort +Input [2]: [o_orderkey#X, o_orderpriority#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(44) Scan parquet Output [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate)] ReadSchema: struct -(44) Filter +(45) Filter Input [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] Condition : ((isnotnull(l_commitdate#X) AND isnotnull(l_receiptdate#X)) AND (l_commitdate#X < l_receiptdate#X)) -(45) Project +(46) Project Output [1]: [l_orderkey#X] Input [3]: [l_orderkey#X, l_commitdate#X, l_receiptdate#X] -(46) Exchange +(47) Exchange Input [1]: [l_orderkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(47) ShuffledHashJoin +(48) Sort +Input [1]: [l_orderkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(49) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: LeftSemi Join condition: None -(48) Project +(50) Project Output [1]: [o_orderpriority#X] Input [2]: [o_orderkey#X, o_orderpriority#X] -(49) HashAggregate +(51) HashAggregate Input [1]: [o_orderpriority#X] Keys [1]: [o_orderpriority#X] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#X] Results [2]: [o_orderpriority#X, count#X] -(50) Exchange +(52) Exchange Input [2]: [o_orderpriority#X, count#X] Arguments: hashpartitioning(o_orderpriority#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(51) HashAggregate +(53) HashAggregate Input [2]: [o_orderpriority#X, count#X] Keys [1]: [o_orderpriority#X] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#X] Results [2]: [o_orderpriority#X, count(1)#X AS order_count#X] -(52) Exchange +(54) Exchange Input [2]: [o_orderpriority#X, order_count#X] Arguments: rangepartitioning(o_orderpriority#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(53) Sort +(55) Sort Input [2]: [o_orderpriority#X, order_count#X] Arguments: [o_orderpriority#X ASC NULLS FIRST], true, 0 -(54) AdaptiveSparkPlan +(56) AdaptiveSparkPlan Output [2]: [o_orderpriority#X, order_count#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/5.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/5.txt index f20a52b91a8f..07a5c86709f4 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/5.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/5.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (146) +AdaptiveSparkPlan (156) +- == Final Plan == VeloxColumnarToRowExec (106) +- ^ SortExecTransformer (104) @@ -83,45 +83,55 @@ AdaptiveSparkPlan (146) +- ^ FilterExecTransformer (79) +- ^ Scan parquet (78) +- == Initial Plan == - Sort (145) - +- Exchange (144) - +- HashAggregate (143) - +- Exchange (142) - +- HashAggregate (141) - +- Project (140) - +- ShuffledHashJoin Inner BuildRight (139) - :- Exchange (134) - : +- Project (133) - : +- ShuffledHashJoin Inner BuildRight (132) - : :- Exchange (128) - : : +- Project (127) - : : +- ShuffledHashJoin Inner BuildRight (126) - : : :- Exchange (122) - : : : +- Project (121) - : : : +- ShuffledHashJoin Inner BuildRight (120) - : : : :- Exchange (116) - : : : : +- Project (115) - : : : : +- ShuffledHashJoin Inner BuildLeft (114) - : : : : :- Exchange (109) - : : : : : +- Filter (108) - : : : : : +- Scan parquet (107) - : : : : +- Exchange (113) - : : : : +- Project (112) - : : : : +- Filter (111) - : : : : +- Scan parquet (110) - : : : +- Exchange (119) - : : : +- Filter (118) - : : : +- Scan parquet (117) - : : +- Exchange (125) - : : +- Filter (124) - : : +- Scan parquet (123) - : +- Exchange (131) - : +- Filter (130) - : +- Scan parquet (129) - +- Exchange (138) - +- Project (137) - +- Filter (136) - +- Scan parquet (135) + Sort (155) + +- Exchange (154) + +- HashAggregate (153) + +- Exchange (152) + +- HashAggregate (151) + +- Project (150) + +- SortMergeJoin Inner (149) + :- Sort (143) + : +- Exchange (142) + : +- Project (141) + : +- SortMergeJoin Inner (140) + : :- Sort (135) + : : +- Exchange (134) + : : +- Project (133) + : : +- SortMergeJoin Inner (132) + : : :- Sort (127) + : : : +- Exchange (126) + : : : +- Project (125) + : : : +- SortMergeJoin Inner (124) + : : : :- Sort (119) + : : : : +- Exchange (118) + : : : : +- Project (117) + : : : : +- SortMergeJoin Inner (116) + : : : : :- Sort (110) + : : : : : +- Exchange (109) + : : : : : +- Filter (108) + : : : : : +- Scan parquet (107) + : : : : +- Sort (115) + : : : : +- Exchange (114) + : : : : +- Project (113) + : : : : +- Filter (112) + : : : : +- Scan parquet (111) + : : : +- Sort (123) + : : : +- Exchange (122) + : : : +- Filter (121) + : : : +- Scan parquet (120) + : : +- Sort (131) + : : +- Exchange (130) + : : +- Filter (129) + : : +- Scan parquet (128) + : +- Sort (139) + : +- Exchange (138) + : +- Filter (137) + : +- Scan parquet (136) + +- Sort (148) + +- Exchange (147) + +- Project (146) + +- Filter (145) + +- Scan parquet (144) (1) Scan parquet @@ -572,181 +582,221 @@ Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(110) Scan parquet +(110) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(111) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1994-01-01), LessThan(o_orderdate,1995-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)] ReadSchema: struct -(111) Filter +(112) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1994-01-01)) AND (o_orderdate#X < 1995-01-01)) AND isnotnull(o_custkey#X)) AND isnotnull(o_orderkey#X)) -(112) Project +(113) Project Output [2]: [o_orderkey#X, o_custkey#X] Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] -(113) Exchange +(114) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(115) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(116) SortMergeJoin Left keys [1]: [c_custkey#X] Right keys [1]: [o_custkey#X] Join type: Inner Join condition: None -(115) Project +(117) Project Output [2]: [c_nationkey#X, o_orderkey#X] Input [4]: [c_custkey#X, c_nationkey#X, o_orderkey#X, o_custkey#X] -(116) Exchange +(118) Exchange Input [2]: [c_nationkey#X, o_orderkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(119) Sort +Input [2]: [c_nationkey#X, o_orderkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(120) Scan parquet Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_orderkey), IsNotNull(l_suppkey)] ReadSchema: struct -(118) Filter +(121) Filter Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Condition : (isnotnull(l_orderkey#X) AND isnotnull(l_suppkey#X)) -(119) Exchange +(122) Exchange Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(123) Sort +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(124) SortMergeJoin Left keys [1]: [o_orderkey#X] Right keys [1]: [l_orderkey#X] Join type: Inner Join condition: None -(121) Project +(125) Project Output [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Input [6]: [c_nationkey#X, o_orderkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] -(122) Exchange +(126) Exchange Input [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(127) Sort +Input [4]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST, c_nationkey#X ASC NULLS FIRST], false, 0 + +(128) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(124) Filter +(129) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(125) Exchange +(130) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) ShuffledHashJoin +(131) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST, s_nationkey#X ASC NULLS FIRST], false, 0 + +(132) SortMergeJoin Left keys [2]: [l_suppkey#X, c_nationkey#X] Right keys [2]: [s_suppkey#X, s_nationkey#X] Join type: Inner Join condition: None -(127) Project +(133) Project Output [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] Input [6]: [c_nationkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(128) Exchange +(134) Exchange Input [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Scan parquet +(135) Sort +Input [3]: [l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(136) Scan parquet Output [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] ReadSchema: struct -(130) Filter +(137) Filter Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Condition : (isnotnull(n_nationkey#X) AND isnotnull(n_regionkey#X)) -(131) Exchange +(138) Exchange Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(139) Sort +Input [3]: [n_nationkey#X, n_name#X, n_regionkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(140) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(133) Project +(141) Project Output [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] Input [6]: [l_extendedprice#X, l_discount#X, s_nationkey#X, n_nationkey#X, n_name#X, n_regionkey#X] -(134) Exchange +(142) Exchange Input [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] Arguments: hashpartitioning(n_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(135) Scan parquet +(143) Sort +Input [4]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X] +Arguments: [n_regionkey#X ASC NULLS FIRST], false, 0 + +(144) Scan parquet Output [2]: [r_regionkey#X, r_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(r_name), EqualTo(r_name,ASIA), IsNotNull(r_regionkey)] ReadSchema: struct -(136) Filter +(145) Filter Input [2]: [r_regionkey#X, r_name#X] Condition : ((isnotnull(r_name#X) AND (r_name#X = ASIA)) AND isnotnull(r_regionkey#X)) -(137) Project +(146) Project Output [1]: [r_regionkey#X] Input [2]: [r_regionkey#X, r_name#X] -(138) Exchange +(147) Exchange Input [1]: [r_regionkey#X] Arguments: hashpartitioning(r_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(139) ShuffledHashJoin +(148) Sort +Input [1]: [r_regionkey#X] +Arguments: [r_regionkey#X ASC NULLS FIRST], false, 0 + +(149) SortMergeJoin Left keys [1]: [n_regionkey#X] Right keys [1]: [r_regionkey#X] Join type: Inner Join condition: None -(140) Project +(150) Project Output [3]: [l_extendedprice#X, l_discount#X, n_name#X] Input [5]: [l_extendedprice#X, l_discount#X, n_name#X, n_regionkey#X, r_regionkey#X] -(141) HashAggregate +(151) HashAggregate Input [3]: [l_extendedprice#X, l_discount#X, n_name#X] Keys [1]: [n_name#X] Functions [1]: [partial_sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [3]: [n_name#X, sum#X, isEmpty#X] -(142) Exchange +(152) Exchange Input [3]: [n_name#X, sum#X, isEmpty#X] Arguments: hashpartitioning(n_name#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(143) HashAggregate +(153) HashAggregate Input [3]: [n_name#X, sum#X, isEmpty#X] Keys [1]: [n_name#X] Functions [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))] Aggregate Attributes [1]: [sum((l_extendedprice#X * (1 - l_discount#X)))#X] Results [2]: [n_name#X, sum((l_extendedprice#X * (1 - l_discount#X)))#X AS revenue#X] -(144) Exchange +(154) Exchange Input [2]: [n_name#X, revenue#X] Arguments: rangepartitioning(revenue#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(145) Sort +(155) Sort Input [2]: [n_name#X, revenue#X] Arguments: [revenue#X DESC NULLS LAST], true, 0 -(146) AdaptiveSparkPlan +(156) AdaptiveSparkPlan Output [2]: [n_name#X, revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/7.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/7.txt index 710c6f3ba189..b27398e415d3 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/7.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/7.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (139) +AdaptiveSparkPlan (149) +- == Final Plan == VeloxColumnarToRowExec (101) +- ^ SortExecTransformer (99) @@ -79,43 +79,53 @@ AdaptiveSparkPlan (139) +- ShuffleQueryStage (79), Statistics(X) +- ReusedExchange (78) +- == Initial Plan == - Sort (138) - +- Exchange (137) - +- HashAggregate (136) - +- Exchange (135) - +- HashAggregate (134) - +- Project (133) - +- ShuffledHashJoin Inner BuildRight (132) - :- Exchange (128) - : +- Project (127) - : +- ShuffledHashJoin Inner BuildRight (126) - : :- Exchange (122) - : : +- Project (121) - : : +- ShuffledHashJoin Inner BuildRight (120) - : : :- Exchange (116) - : : : +- Project (115) - : : : +- ShuffledHashJoin Inner BuildRight (114) - : : : :- Exchange (110) - : : : : +- Project (109) - : : : : +- ShuffledHashJoin Inner BuildLeft (108) - : : : : :- Exchange (104) - : : : : : +- Filter (103) - : : : : : +- Scan parquet (102) - : : : : +- Exchange (107) - : : : : +- Filter (106) - : : : : +- Scan parquet (105) - : : : +- Exchange (113) - : : : +- Filter (112) - : : : +- Scan parquet (111) - : : +- Exchange (119) - : : +- Filter (118) - : : +- Scan parquet (117) - : +- Exchange (125) - : +- Filter (124) - : +- Scan parquet (123) - +- Exchange (131) - +- Filter (130) - +- Scan parquet (129) + Sort (148) + +- Exchange (147) + +- HashAggregate (146) + +- Exchange (145) + +- HashAggregate (144) + +- Project (143) + +- SortMergeJoin Inner (142) + :- Sort (137) + : +- Exchange (136) + : +- Project (135) + : +- SortMergeJoin Inner (134) + : :- Sort (129) + : : +- Exchange (128) + : : +- Project (127) + : : +- SortMergeJoin Inner (126) + : : :- Sort (121) + : : : +- Exchange (120) + : : : +- Project (119) + : : : +- SortMergeJoin Inner (118) + : : : :- Sort (113) + : : : : +- Exchange (112) + : : : : +- Project (111) + : : : : +- SortMergeJoin Inner (110) + : : : : :- Sort (105) + : : : : : +- Exchange (104) + : : : : : +- Filter (103) + : : : : : +- Scan parquet (102) + : : : : +- Sort (109) + : : : : +- Exchange (108) + : : : : +- Filter (107) + : : : : +- Scan parquet (106) + : : : +- Sort (117) + : : : +- Exchange (116) + : : : +- Filter (115) + : : : +- Scan parquet (114) + : : +- Sort (125) + : : +- Exchange (124) + : : +- Filter (123) + : : +- Scan parquet (122) + : +- Sort (133) + : +- Exchange (132) + : +- Filter (131) + : +- Scan parquet (130) + +- Sort (141) + +- Exchange (140) + +- Filter (139) + +- Scan parquet (138) (1) Scan parquet @@ -542,173 +552,213 @@ Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(105) Scan parquet +(105) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(106) Scan parquet Output [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1995-01-01), LessThanOrEqual(l_shipdate,1996-12-31), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(106) Filter +(107) Filter Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Condition : ((((isnotnull(l_shipdate#X) AND (l_shipdate#X >= 1995-01-01)) AND (l_shipdate#X <= 1996-12-31)) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(107) Exchange +(108) Exchange Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(108) ShuffledHashJoin +(109) Sort +Input [5]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(110) SortMergeJoin Left keys [1]: [s_suppkey#X] Right keys [1]: [l_suppkey#X] Join type: Inner Join condition: None -(109) Project +(111) Project Output [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Input [7]: [s_suppkey#X, s_nationkey#X, l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] -(110) Exchange +(112) Exchange Input [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(111) Scan parquet +(113) Sort +Input [5]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(114) Scan parquet Output [2]: [o_orderkey#X, o_custkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderkey), IsNotNull(o_custkey)] ReadSchema: struct -(112) Filter +(115) Filter Input [2]: [o_orderkey#X, o_custkey#X] Condition : (isnotnull(o_orderkey#X) AND isnotnull(o_custkey#X)) -(113) Exchange +(116) Exchange Input [2]: [o_orderkey#X, o_custkey#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(117) Sort +Input [2]: [o_orderkey#X, o_custkey#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(118) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join type: Inner Join condition: None -(115) Project +(119) Project Output [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] Input [7]: [s_nationkey#X, l_orderkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_orderkey#X, o_custkey#X] -(116) Exchange +(120) Exchange Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(121) Sort +Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(122) Scan parquet Output [2]: [c_custkey#X, c_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] ReadSchema: struct -(118) Filter +(123) Filter Input [2]: [c_custkey#X, c_nationkey#X] Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) -(119) Exchange +(124) Exchange Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(125) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(126) SortMergeJoin Left keys [1]: [o_custkey#X] Right keys [1]: [c_custkey#X] Join type: Inner Join condition: None -(121) Project +(127) Project Output [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, o_custkey#X, c_custkey#X, c_nationkey#X] -(122) Exchange +(128) Exchange Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(129) Sort +Input [5]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(130) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), Or(EqualTo(n_name,FRANCE),EqualTo(n_name,GERMANY))] ReadSchema: struct -(124) Filter +(131) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : (isnotnull(n_nationkey#X) AND ((n_name#X = FRANCE) OR (n_name#X = GERMANY))) -(125) Exchange +(132) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) ShuffledHashJoin +(133) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(134) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(127) Project +(135) Project Output [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] Input [7]: [s_nationkey#X, l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_nationkey#X, n_name#X] -(128) Exchange +(136) Exchange Input [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Scan parquet +(137) Sort +Input [5]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(138) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), Or(EqualTo(n_name,GERMANY),EqualTo(n_name,FRANCE))] ReadSchema: struct -(130) Filter +(139) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : (isnotnull(n_nationkey#X) AND ((n_name#X = GERMANY) OR (n_name#X = FRANCE))) -(131) Exchange +(140) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(141) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(142) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: (((n_name#X = FRANCE) AND (n_name#X = GERMANY)) OR ((n_name#X = GERMANY) AND (n_name#X = FRANCE))) -(133) Project +(143) Project Output [4]: [n_name#X AS supp_nation#X, n_name#X AS cust_nation#X, year(l_shipdate#X) AS l_year#X, (l_extendedprice#X * (1 - l_discount#X)) AS volume#X] Input [7]: [l_extendedprice#X, l_discount#X, l_shipdate#X, c_nationkey#X, n_name#X, n_nationkey#X, n_name#X] -(134) HashAggregate +(144) HashAggregate Input [4]: [supp_nation#X, cust_nation#X, l_year#X, volume#X] Keys [3]: [supp_nation#X, cust_nation#X, l_year#X] Functions [1]: [partial_sum(volume#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] -(135) Exchange +(145) Exchange Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] Arguments: hashpartitioning(supp_nation#X, cust_nation#X, l_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(136) HashAggregate +(146) HashAggregate Input [5]: [supp_nation#X, cust_nation#X, l_year#X, sum#X, isEmpty#X] Keys [3]: [supp_nation#X, cust_nation#X, l_year#X] Functions [1]: [sum(volume#X)] Aggregate Attributes [1]: [sum(volume#X)#X] Results [4]: [supp_nation#X, cust_nation#X, l_year#X, sum(volume#X)#X AS revenue#X] -(137) Exchange +(147) Exchange Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: rangepartitioning(supp_nation#X ASC NULLS FIRST, cust_nation#X ASC NULLS FIRST, l_year#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(138) Sort +(148) Sort Input [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: [supp_nation#X ASC NULLS FIRST, cust_nation#X ASC NULLS FIRST, l_year#X ASC NULLS FIRST], true, 0 -(139) AdaptiveSparkPlan +(149) AdaptiveSparkPlan Output [4]: [supp_nation#X, cust_nation#X, l_year#X, revenue#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/8.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/8.txt index 953a3a8f0a7c..fa2a2789f4d3 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/8.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/8.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (193) +AdaptiveSparkPlan (207) +- == Final Plan == VeloxColumnarToRowExec (141) +- ^ SortExecTransformer (139) @@ -110,57 +110,71 @@ AdaptiveSparkPlan (193) +- ^ FilterExecTransformer (113) +- ^ Scan parquet (112) +- == Initial Plan == - Sort (192) - +- Exchange (191) - +- HashAggregate (190) - +- Exchange (189) - +- HashAggregate (188) - +- Project (187) - +- ShuffledHashJoin Inner BuildRight (186) - :- Exchange (181) - : +- Project (180) - : +- ShuffledHashJoin Inner BuildRight (179) - : :- Exchange (175) - : : +- Project (174) - : : +- ShuffledHashJoin Inner BuildRight (173) - : : :- Exchange (169) - : : : +- Project (168) - : : : +- ShuffledHashJoin Inner BuildRight (167) - : : : :- Exchange (163) - : : : : +- Project (162) - : : : : +- ShuffledHashJoin Inner BuildRight (161) - : : : : :- Exchange (157) - : : : : : +- Project (156) - : : : : : +- ShuffledHashJoin Inner BuildRight (155) - : : : : : :- Exchange (151) - : : : : : : +- Project (150) - : : : : : : +- ShuffledHashJoin Inner BuildLeft (149) - : : : : : : :- Exchange (145) - : : : : : : : +- Project (144) - : : : : : : : +- Filter (143) - : : : : : : : +- Scan parquet (142) - : : : : : : +- Exchange (148) - : : : : : : +- Filter (147) - : : : : : : +- Scan parquet (146) - : : : : : +- Exchange (154) - : : : : : +- Filter (153) - : : : : : +- Scan parquet (152) - : : : : +- Exchange (160) - : : : : +- Filter (159) - : : : : +- Scan parquet (158) - : : : +- Exchange (166) - : : : +- Filter (165) - : : : +- Scan parquet (164) - : : +- Exchange (172) - : : +- Filter (171) - : : +- Scan parquet (170) - : +- Exchange (178) - : +- Filter (177) - : +- Scan parquet (176) - +- Exchange (185) - +- Project (184) - +- Filter (183) - +- Scan parquet (182) + Sort (206) + +- Exchange (205) + +- HashAggregate (204) + +- Exchange (203) + +- HashAggregate (202) + +- Project (201) + +- SortMergeJoin Inner (200) + :- Sort (194) + : +- Exchange (193) + : +- Project (192) + : +- SortMergeJoin Inner (191) + : :- Sort (186) + : : +- Exchange (185) + : : +- Project (184) + : : +- SortMergeJoin Inner (183) + : : :- Sort (178) + : : : +- Exchange (177) + : : : +- Project (176) + : : : +- SortMergeJoin Inner (175) + : : : :- Sort (170) + : : : : +- Exchange (169) + : : : : +- Project (168) + : : : : +- SortMergeJoin Inner (167) + : : : : :- Sort (162) + : : : : : +- Exchange (161) + : : : : : +- Project (160) + : : : : : +- SortMergeJoin Inner (159) + : : : : : :- Sort (154) + : : : : : : +- Exchange (153) + : : : : : : +- Project (152) + : : : : : : +- SortMergeJoin Inner (151) + : : : : : : :- Sort (146) + : : : : : : : +- Exchange (145) + : : : : : : : +- Project (144) + : : : : : : : +- Filter (143) + : : : : : : : +- Scan parquet (142) + : : : : : : +- Sort (150) + : : : : : : +- Exchange (149) + : : : : : : +- Filter (148) + : : : : : : +- Scan parquet (147) + : : : : : +- Sort (158) + : : : : : +- Exchange (157) + : : : : : +- Filter (156) + : : : : : +- Scan parquet (155) + : : : : +- Sort (166) + : : : : +- Exchange (165) + : : : : +- Filter (164) + : : : : +- Scan parquet (163) + : : : +- Sort (174) + : : : +- Exchange (173) + : : : +- Filter (172) + : : : +- Scan parquet (171) + : : +- Sort (182) + : : +- Exchange (181) + : : +- Filter (180) + : : +- Scan parquet (179) + : +- Sort (190) + : +- Exchange (189) + : +- Filter (188) + : +- Scan parquet (187) + +- Sort (199) + +- Exchange (198) + +- Project (197) + +- Filter (196) + +- Scan parquet (195) (1) Scan parquet @@ -757,235 +771,291 @@ Input [2]: [p_partkey#X, p_type#X] Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(146) Scan parquet +(146) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(147) Scan parquet Output [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(147) Filter +(148) Filter Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Condition : ((isnotnull(l_partkey#X) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(148) Exchange +(149) Exchange Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(149) ShuffledHashJoin +(150) Sort +Input [5]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(151) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join type: Inner Join condition: None -(150) Project +(152) Project Output [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Input [6]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] -(151) Exchange +(153) Exchange Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(152) Scan parquet +(154) Sort +Input [4]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(155) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(153) Filter +(156) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(154) Exchange +(157) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(155) ShuffledHashJoin +(158) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(159) SortMergeJoin Left keys [1]: [l_suppkey#X] Right keys [1]: [s_suppkey#X] Join type: Inner Join condition: None -(156) Project +(160) Project Output [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Input [6]: [l_orderkey#X, l_suppkey#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(157) Exchange +(161) Exchange Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(158) Scan parquet +(162) Sort +Input [4]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(163) Scan parquet Output [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1995-01-01), LessThanOrEqual(o_orderdate,1996-12-31), IsNotNull(o_orderkey), IsNotNull(o_custkey)] ReadSchema: struct -(159) Filter +(164) Filter Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Condition : ((((isnotnull(o_orderdate#X) AND (o_orderdate#X >= 1995-01-01)) AND (o_orderdate#X <= 1996-12-31)) AND isnotnull(o_orderkey#X)) AND isnotnull(o_custkey#X)) -(160) Exchange +(165) Exchange Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(161) ShuffledHashJoin +(166) Sort +Input [3]: [o_orderkey#X, o_custkey#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(167) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join type: Inner Join condition: None -(162) Project +(168) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] Input [7]: [l_orderkey#X, l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderkey#X, o_custkey#X, o_orderdate#X] -(163) Exchange +(169) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] Arguments: hashpartitioning(o_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(164) Scan parquet +(170) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X] +Arguments: [o_custkey#X ASC NULLS FIRST], false, 0 + +(171) Scan parquet Output [2]: [c_custkey#X, c_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] ReadSchema: struct -(165) Filter +(172) Filter Input [2]: [c_custkey#X, c_nationkey#X] Condition : (isnotnull(c_custkey#X) AND isnotnull(c_nationkey#X)) -(166) Exchange +(173) Exchange Input [2]: [c_custkey#X, c_nationkey#X] Arguments: hashpartitioning(c_custkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(167) ShuffledHashJoin +(174) Sort +Input [2]: [c_custkey#X, c_nationkey#X] +Arguments: [c_custkey#X ASC NULLS FIRST], false, 0 + +(175) SortMergeJoin Left keys [1]: [o_custkey#X] Right keys [1]: [c_custkey#X] Join type: Inner Join condition: None -(168) Project +(176) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_custkey#X, o_orderdate#X, c_custkey#X, c_nationkey#X] -(169) Exchange +(177) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] Arguments: hashpartitioning(c_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(170) Scan parquet +(178) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X] +Arguments: [c_nationkey#X ASC NULLS FIRST], false, 0 + +(179) Scan parquet Output [2]: [n_nationkey#X, n_regionkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] ReadSchema: struct -(171) Filter +(180) Filter Input [2]: [n_nationkey#X, n_regionkey#X] Condition : (isnotnull(n_nationkey#X) AND isnotnull(n_regionkey#X)) -(172) Exchange +(181) Exchange Input [2]: [n_nationkey#X, n_regionkey#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(173) ShuffledHashJoin +(182) Sort +Input [2]: [n_nationkey#X, n_regionkey#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(183) SortMergeJoin Left keys [1]: [c_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(174) Project +(184) Project Output [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, c_nationkey#X, n_nationkey#X, n_regionkey#X] -(175) Exchange +(185) Exchange Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(176) Scan parquet +(186) Sort +Input [5]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(187) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(177) Filter +(188) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(178) Exchange +(189) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(179) ShuffledHashJoin +(190) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(191) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(180) Project +(192) Project Output [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] Input [7]: [l_extendedprice#X, l_discount#X, s_nationkey#X, o_orderdate#X, n_regionkey#X, n_nationkey#X, n_name#X] -(181) Exchange +(193) Exchange Input [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] Arguments: hashpartitioning(n_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(182) Scan parquet +(194) Sort +Input [5]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X] +Arguments: [n_regionkey#X ASC NULLS FIRST], false, 0 + +(195) Scan parquet Output [2]: [r_regionkey#X, r_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(r_name), EqualTo(r_name,AMERICA), IsNotNull(r_regionkey)] ReadSchema: struct -(183) Filter +(196) Filter Input [2]: [r_regionkey#X, r_name#X] Condition : ((isnotnull(r_name#X) AND (r_name#X = AMERICA)) AND isnotnull(r_regionkey#X)) -(184) Project +(197) Project Output [1]: [r_regionkey#X] Input [2]: [r_regionkey#X, r_name#X] -(185) Exchange +(198) Exchange Input [1]: [r_regionkey#X] Arguments: hashpartitioning(r_regionkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(186) ShuffledHashJoin +(199) Sort +Input [1]: [r_regionkey#X] +Arguments: [r_regionkey#X ASC NULLS FIRST], false, 0 + +(200) SortMergeJoin Left keys [1]: [n_regionkey#X] Right keys [1]: [r_regionkey#X] Join type: Inner Join condition: None -(187) Project +(201) Project Output [3]: [year(o_orderdate#X) AS o_year#X, (l_extendedprice#X * (1 - l_discount#X)) AS volume#X, n_name#X AS nation#X] Input [6]: [l_extendedprice#X, l_discount#X, o_orderdate#X, n_regionkey#X, n_name#X, r_regionkey#X] -(188) HashAggregate +(202) HashAggregate Input [3]: [o_year#X, volume#X, nation#X] Keys [1]: [o_year#X] Functions [2]: [partial_sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END), partial_sum(volume#X)] Aggregate Attributes [4]: [sum#X, isEmpty#X, sum#X, isEmpty#X] Results [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] -(189) Exchange +(203) Exchange Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] Arguments: hashpartitioning(o_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(190) HashAggregate +(204) HashAggregate Input [5]: [o_year#X, sum#X, isEmpty#X, sum#X, isEmpty#X] Keys [1]: [o_year#X] Functions [2]: [sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END), sum(volume#X)] Aggregate Attributes [2]: [sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X, sum(volume#X)#X] Results [2]: [o_year#X, (sum(CASE WHEN (nation#X = BRAZIL) THEN volume#X ELSE 0.0000 END)#X / sum(volume#X)#X) AS mkt_share#X] -(191) Exchange +(205) Exchange Input [2]: [o_year#X, mkt_share#X] Arguments: rangepartitioning(o_year#X ASC NULLS FIRST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(192) Sort +(206) Sort Input [2]: [o_year#X, mkt_share#X] Arguments: [o_year#X ASC NULLS FIRST], true, 0 -(193) AdaptiveSparkPlan +(207) AdaptiveSparkPlan Output [2]: [o_year#X, mkt_share#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/9.txt b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/9.txt index 492ff1aeadd0..3000cbae7a6d 100644 --- a/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/9.txt +++ b/backends-velox/src/test/resources/tpch-approved-plan/v1/spark34/9.txt @@ -1,5 +1,5 @@ == Physical Plan == -AdaptiveSparkPlan (145) +AdaptiveSparkPlan (155) +- == Final Plan == VeloxColumnarToRowExec (106) +- ^ SortExecTransformer (104) @@ -83,44 +83,54 @@ AdaptiveSparkPlan (145) +- ^ FilterExecTransformer (79) +- ^ Scan parquet (78) +- == Initial Plan == - Sort (144) - +- Exchange (143) - +- HashAggregate (142) - +- Exchange (141) - +- HashAggregate (140) - +- Project (139) - +- ShuffledHashJoin Inner BuildRight (138) - :- Exchange (134) - : +- Project (133) - : +- ShuffledHashJoin Inner BuildRight (132) - : :- Exchange (128) - : : +- Project (127) - : : +- ShuffledHashJoin Inner BuildRight (126) - : : :- Exchange (122) - : : : +- Project (121) - : : : +- ShuffledHashJoin Inner BuildRight (120) - : : : :- Exchange (116) - : : : : +- Project (115) - : : : : +- ShuffledHashJoin Inner BuildLeft (114) - : : : : :- Exchange (110) - : : : : : +- Project (109) - : : : : : +- Filter (108) - : : : : : +- Scan parquet (107) - : : : : +- Exchange (113) - : : : : +- Filter (112) - : : : : +- Scan parquet (111) - : : : +- Exchange (119) - : : : +- Filter (118) - : : : +- Scan parquet (117) - : : +- Exchange (125) - : : +- Filter (124) - : : +- Scan parquet (123) - : +- Exchange (131) - : +- Filter (130) - : +- Scan parquet (129) - +- Exchange (137) - +- Filter (136) - +- Scan parquet (135) + Sort (154) + +- Exchange (153) + +- HashAggregate (152) + +- Exchange (151) + +- HashAggregate (150) + +- Project (149) + +- SortMergeJoin Inner (148) + :- Sort (143) + : +- Exchange (142) + : +- Project (141) + : +- SortMergeJoin Inner (140) + : :- Sort (135) + : : +- Exchange (134) + : : +- Project (133) + : : +- SortMergeJoin Inner (132) + : : :- Sort (127) + : : : +- Exchange (126) + : : : +- Project (125) + : : : +- SortMergeJoin Inner (124) + : : : :- Sort (119) + : : : : +- Exchange (118) + : : : : +- Project (117) + : : : : +- SortMergeJoin Inner (116) + : : : : :- Sort (111) + : : : : : +- Exchange (110) + : : : : : +- Project (109) + : : : : : +- Filter (108) + : : : : : +- Scan parquet (107) + : : : : +- Sort (115) + : : : : +- Exchange (114) + : : : : +- Filter (113) + : : : : +- Scan parquet (112) + : : : +- Sort (123) + : : : +- Exchange (122) + : : : +- Filter (121) + : : : +- Scan parquet (120) + : : +- Sort (131) + : : +- Exchange (130) + : : +- Filter (129) + : : +- Scan parquet (128) + : +- Sort (139) + : +- Exchange (138) + : +- Filter (137) + : +- Scan parquet (136) + +- Sort (147) + +- Exchange (146) + +- Filter (145) + +- Scan parquet (144) (1) Scan parquet @@ -575,173 +585,213 @@ Input [2]: [p_partkey#X, p_name#X] Input [1]: [p_partkey#X] Arguments: hashpartitioning(p_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(111) Scan parquet +(111) Sort +Input [1]: [p_partkey#X] +Arguments: [p_partkey#X ASC NULLS FIRST], false, 0 + +(112) Scan parquet Output [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)] ReadSchema: struct -(112) Filter +(113) Filter Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Condition : ((isnotnull(l_partkey#X) AND isnotnull(l_suppkey#X)) AND isnotnull(l_orderkey#X)) -(113) Exchange +(114) Exchange Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(114) ShuffledHashJoin +(115) Sort +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_partkey#X ASC NULLS FIRST], false, 0 + +(116) SortMergeJoin Left keys [1]: [p_partkey#X] Right keys [1]: [l_partkey#X] Join type: Inner Join condition: None -(115) Project +(117) Project Output [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Input [7]: [p_partkey#X, l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] -(116) Exchange +(118) Exchange Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] Arguments: hashpartitioning(l_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(117) Scan parquet +(119) Sort +Input [6]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X] +Arguments: [l_suppkey#X ASC NULLS FIRST], false, 0 + +(120) Scan parquet Output [2]: [s_suppkey#X, s_nationkey#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct -(118) Filter +(121) Filter Input [2]: [s_suppkey#X, s_nationkey#X] Condition : (isnotnull(s_suppkey#X) AND isnotnull(s_nationkey#X)) -(119) Exchange +(122) Exchange Input [2]: [s_suppkey#X, s_nationkey#X] Arguments: hashpartitioning(s_suppkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(120) ShuffledHashJoin +(123) Sort +Input [2]: [s_suppkey#X, s_nationkey#X] +Arguments: [s_suppkey#X ASC NULLS FIRST], false, 0 + +(124) SortMergeJoin Left keys [1]: [l_suppkey#X] Right keys [1]: [s_suppkey#X] Join type: Inner Join condition: None -(121) Project +(125) Project Output [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Input [8]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_suppkey#X, s_nationkey#X] -(122) Exchange +(126) Exchange Input [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] Arguments: hashpartitioning(l_suppkey#X, l_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(123) Scan parquet +(127) Sort +Input [7]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X] +Arguments: [l_suppkey#X ASC NULLS FIRST, l_partkey#X ASC NULLS FIRST], false, 0 + +(128) Scan parquet Output [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(ps_suppkey), IsNotNull(ps_partkey)] ReadSchema: struct -(124) Filter +(129) Filter Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Condition : (isnotnull(ps_suppkey#X) AND isnotnull(ps_partkey#X)) -(125) Exchange +(130) Exchange Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] Arguments: hashpartitioning(ps_suppkey#X, ps_partkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(126) ShuffledHashJoin +(131) Sort +Input [3]: [ps_partkey#X, ps_suppkey#X, ps_supplycost#X] +Arguments: [ps_suppkey#X ASC NULLS FIRST, ps_partkey#X ASC NULLS FIRST], false, 0 + +(132) SortMergeJoin Left keys [2]: [l_suppkey#X, l_partkey#X] Right keys [2]: [ps_suppkey#X, ps_partkey#X] Join type: Inner Join condition: None -(127) Project +(133) Project Output [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] Input [10]: [l_orderkey#X, l_partkey#X, l_suppkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_partkey#X, ps_suppkey#X, ps_supplycost#X] -(128) Exchange +(134) Exchange Input [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] Arguments: hashpartitioning(l_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(129) Scan parquet +(135) Sort +Input [6]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X] +Arguments: [l_orderkey#X ASC NULLS FIRST], false, 0 + +(136) Scan parquet Output [2]: [o_orderkey#X, o_orderdate#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(o_orderkey)] ReadSchema: struct -(130) Filter +(137) Filter Input [2]: [o_orderkey#X, o_orderdate#X] Condition : isnotnull(o_orderkey#X) -(131) Exchange +(138) Exchange Input [2]: [o_orderkey#X, o_orderdate#X] Arguments: hashpartitioning(o_orderkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(132) ShuffledHashJoin +(139) Sort +Input [2]: [o_orderkey#X, o_orderdate#X] +Arguments: [o_orderkey#X ASC NULLS FIRST], false, 0 + +(140) SortMergeJoin Left keys [1]: [l_orderkey#X] Right keys [1]: [o_orderkey#X] Join type: Inner Join condition: None -(133) Project +(141) Project Output [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] Input [8]: [l_orderkey#X, l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderkey#X, o_orderdate#X] -(134) Exchange +(142) Exchange Input [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] Arguments: hashpartitioning(s_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(135) Scan parquet +(143) Sort +Input [6]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X] +Arguments: [s_nationkey#X ASC NULLS FIRST], false, 0 + +(144) Scan parquet Output [2]: [n_nationkey#X, n_name#X] Batched: true Location: InMemoryFileIndex [*] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct -(136) Filter +(145) Filter Input [2]: [n_nationkey#X, n_name#X] Condition : isnotnull(n_nationkey#X) -(137) Exchange +(146) Exchange Input [2]: [n_nationkey#X, n_name#X] Arguments: hashpartitioning(n_nationkey#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(138) ShuffledHashJoin +(147) Sort +Input [2]: [n_nationkey#X, n_name#X] +Arguments: [n_nationkey#X ASC NULLS FIRST], false, 0 + +(148) SortMergeJoin Left keys [1]: [s_nationkey#X] Right keys [1]: [n_nationkey#X] Join type: Inner Join condition: None -(139) Project +(149) Project Output [3]: [n_name#X AS nation#X, year(o_orderdate#X) AS o_year#X, ((l_extendedprice#X * (1 - l_discount#X)) - (ps_supplycost#X * l_quantity#X)) AS amount#X] Input [8]: [l_quantity#X, l_extendedprice#X, l_discount#X, s_nationkey#X, ps_supplycost#X, o_orderdate#X, n_nationkey#X, n_name#X] -(140) HashAggregate +(150) HashAggregate Input [3]: [nation#X, o_year#X, amount#X] Keys [2]: [nation#X, o_year#X] Functions [1]: [partial_sum(amount#X)] Aggregate Attributes [2]: [sum#X, isEmpty#X] Results [4]: [nation#X, o_year#X, sum#X, isEmpty#X] -(141) Exchange +(151) Exchange Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] Arguments: hashpartitioning(nation#X, o_year#X, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(142) HashAggregate +(152) HashAggregate Input [4]: [nation#X, o_year#X, sum#X, isEmpty#X] Keys [2]: [nation#X, o_year#X] Functions [1]: [sum(amount#X)] Aggregate Attributes [1]: [sum(amount#X)#X] Results [3]: [nation#X, o_year#X, sum(amount#X)#X AS sum_profit#X] -(143) Exchange +(153) Exchange Input [3]: [nation#X, o_year#X, sum_profit#X] Arguments: rangepartitioning(nation#X ASC NULLS FIRST, o_year#X DESC NULLS LAST, 1), ENSURE_REQUIREMENTS, [plan_id=X] -(144) Sort +(154) Sort Input [3]: [nation#X, o_year#X, sum_profit#X] Arguments: [nation#X ASC NULLS FIRST, o_year#X DESC NULLS LAST], true, 0 -(145) AdaptiveSparkPlan +(155) AdaptiveSparkPlan Output [3]: [nation#X, o_year#X, sum_profit#X] Arguments: isFinalPlan=true \ No newline at end of file diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/FallbackSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/FallbackSuite.scala index 27d191b9ee05..2b40ac54b2c6 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/FallbackSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/FallbackSuite.scala @@ -23,6 +23,7 @@ import org.apache.spark.SparkConf import org.apache.spark.sql.execution.{ColumnarShuffleExchangeExec, SparkPlan} import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanHelper, AQEShuffleReadExec} import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec +import org.apache.spark.sql.execution.joins.SortMergeJoinExec class FallbackSuite extends VeloxWholeStageTransformerSuite with AdaptiveSparkPlanHelper { protected val rootPath: String = getClass.getResource("/").getPath @@ -240,4 +241,26 @@ class FallbackSuite extends VeloxWholeStageTransformerSuite with AdaptiveSparkPl } } } + + test("fallback with smj") { + val sql = "SELECT /*+ SHUFFLE_MERGE(tmp1) */ * FROM tmp1 join tmp2 on tmp1.c1 = tmp2.c1" + withSQLConf( + GlutenConfig.COLUMNAR_FPRCE_SHUFFLED_HASH_JOIN_ENABLED.key -> "true", + GlutenConfig.COLUMNAR_SHUFFLED_HASH_JOIN_ENABLED.key -> "false") { + runQueryAndCompare(sql) { + df => + val plan = df.queryExecution.executedPlan + assert(collect(plan) { case smj: SortMergeJoinExec => smj }.size == 1) + } + } + withSQLConf( + GlutenConfig.COLUMNAR_FPRCE_SHUFFLED_HASH_JOIN_ENABLED.key -> "false", + GlutenConfig.COLUMNAR_SORTMERGEJOIN_ENABLED.key -> "false") { + runQueryAndCompare(sql) { + df => + val plan = df.queryExecution.executedPlan + assert(collect(plan) { case smj: SortMergeJoinExec => smj }.size == 1) + } + } + } } diff --git a/gluten-core/src/main/scala/org/apache/gluten/GlutenPlugin.scala b/gluten-core/src/main/scala/org/apache/gluten/GlutenPlugin.scala index 2860e3ced072..0f397c69263c 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/GlutenPlugin.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/GlutenPlugin.scala @@ -22,7 +22,7 @@ import org.apache.gluten.backendsapi.BackendsApiManager import org.apache.gluten.events.GlutenBuildInfoEvent import org.apache.gluten.exception.GlutenException import org.apache.gluten.expression.ExpressionMappings -import org.apache.gluten.extension.{ColumnarOverrides, OthersExtensionOverrides, QueryStagePrepOverrides, StrategyOverrides} +import org.apache.gluten.extension.{ColumnarOverrides, OthersExtensionOverrides, QueryStagePrepOverrides} import org.apache.gluten.test.TestStats import org.apache.gluten.utils.TaskListener @@ -312,7 +312,6 @@ private[gluten] object GlutenPlugin { val DEFAULT_INJECTORS: List[GlutenSparkExtensionsInjector] = List( QueryStagePrepOverrides, ColumnarOverrides, - StrategyOverrides, OthersExtensionOverrides ) } diff --git a/gluten-core/src/main/scala/org/apache/gluten/backendsapi/BackendSettingsApi.scala b/gluten-core/src/main/scala/org/apache/gluten/backendsapi/BackendSettingsApi.scala index 50292839b684..d159486373ac 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/backendsapi/BackendSettingsApi.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/backendsapi/BackendSettingsApi.scala @@ -86,7 +86,6 @@ trait BackendSettingsApi { * the result columns from the shuffle. */ def supportShuffleWithProject(outputPartitioning: Partitioning, child: SparkPlan): Boolean = false - def utilizeShuffledHashJoinHint(): Boolean = false def excludeScanExecFromCollapsedStage(): Boolean = false def rescaleDecimalArithmetic: Boolean = false diff --git a/shims/spark32/src/main/scala/org/apache/spark/sql/execution/JoinSelectionShim.scala b/gluten-core/src/main/scala/org/apache/gluten/execution/SortUtils.scala similarity index 53% rename from shims/spark32/src/main/scala/org/apache/spark/sql/execution/JoinSelectionShim.scala rename to gluten-core/src/main/scala/org/apache/gluten/execution/SortUtils.scala index 5849dd600b7d..2c0ad1b0a59a 100644 --- a/shims/spark32/src/main/scala/org/apache/spark/sql/execution/JoinSelectionShim.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/execution/SortUtils.scala @@ -14,27 +14,20 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.sql.execution +package org.apache.gluten.execution -import org.apache.spark.sql.catalyst.expressions.Expression -import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys -import org.apache.spark.sql.catalyst.plans.JoinType -import org.apache.spark.sql.catalyst.plans.logical.{Join, JoinHint, LogicalPlan} +import org.apache.gluten.extension.columnar.rewrite.RewrittenNodeWall -// https://issues.apache.org/jira/browse/SPARK-36745 -object JoinSelectionShim { - object ExtractEquiJoinKeysShim { - type ReturnType = - ( - JoinType, - Seq[Expression], - Seq[Expression], - Option[Expression], - LogicalPlan, - LogicalPlan, - JoinHint) - def unapply(join: Join): Option[ReturnType] = { - ExtractEquiJoinKeys.unapply(join) - } +import org.apache.spark.sql.execution.{ProjectExec, SortExec, SparkPlan} + +object SortUtils { + def dropPartialSort(plan: SparkPlan): SparkPlan = plan match { + case RewrittenNodeWall(p) => RewrittenNodeWall(dropPartialSort(p)) + case sort: SortExec if !sort.global => sort.child + // from pre/post project-pulling + case ProjectExec(_, SortExec(_, false, ProjectExec(_, p), _)) + if plan.outputSet == p.outputSet => + p + case _ => plan } } diff --git a/gluten-core/src/main/scala/org/apache/gluten/extension/StrategyOverrides.scala b/gluten-core/src/main/scala/org/apache/gluten/extension/StrategyOverrides.scala deleted file mode 100644 index f2f786259393..000000000000 --- a/gluten-core/src/main/scala/org/apache/gluten/extension/StrategyOverrides.scala +++ /dev/null @@ -1,213 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.gluten.extension - -import org.apache.gluten.{GlutenConfig, GlutenSparkExtensionsInjector} -import org.apache.gluten.backendsapi.BackendsApiManager -import org.apache.gluten.extension.columnar.TRANSFORM_UNSUPPORTED -import org.apache.gluten.extension.columnar.TransformHints.TAG -import org.apache.gluten.utils.LogicalPlanSelector - -import org.apache.spark.sql.{SparkSession, SparkSessionExtensions, Strategy} -import org.apache.spark.sql.catalyst.SQLConfHelper -import org.apache.spark.sql.catalyst.expressions.Expression -import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight, JoinSelectionHelper} -import org.apache.spark.sql.catalyst.plans._ -import org.apache.spark.sql.catalyst.plans.logical._ -import org.apache.spark.sql.execution.{joins, JoinSelectionShim, SparkPlan} -import org.apache.spark.sql.execution.adaptive.{BroadcastQueryStageExec, LogicalQueryStage} -import org.apache.spark.sql.execution.joins.BroadcastHashJoinExec - -object StrategyOverrides extends GlutenSparkExtensionsInjector { - override def inject(extensions: SparkSessionExtensions): Unit = { - extensions.injectPlannerStrategy(JoinSelectionOverrides) - } -} - -case class JoinSelectionOverrides(session: SparkSession) - extends Strategy - with JoinSelectionHelper - with SQLConfHelper { - - private def isBroadcastStage(plan: LogicalPlan): Boolean = plan match { - case LogicalQueryStage(_, _: BroadcastQueryStageExec) => true - case _ => false - } - - def extractEqualJoinKeyCondition( - joinType: JoinType, - leftKeys: Seq[Expression], - rightKeys: Seq[Expression], - condition: Option[Expression], - left: LogicalPlan, - right: LogicalPlan, - hint: JoinHint, - forceShuffledHashJoin: Boolean): Seq[SparkPlan] = { - if (isBroadcastStage(left) || isBroadcastStage(right)) { - val buildSide = if (isBroadcastStage(left)) BuildLeft else BuildRight - Seq( - BroadcastHashJoinExec( - leftKeys, - rightKeys, - joinType, - buildSide, - condition, - planLater(left), - planLater(right))) - } else { - // Generate BHJ here, avoid to do match in `JoinSelection` again. - val isHintEmpty = hint.leftHint.isEmpty && hint.rightHint.isEmpty - val buildSide = getBroadcastBuildSide(left, right, joinType, hint, !isHintEmpty, conf) - if (buildSide.isDefined) { - return Seq( - joins.BroadcastHashJoinExec( - leftKeys, - rightKeys, - joinType, - buildSide.get, - condition, - planLater(left), - planLater(right))) - } - - if ( - forceShuffledHashJoin && - !BackendsApiManager.getSparkPlanExecApiInstance.joinFallback( - joinType, - left.outputSet, - right.outputSet, - condition) && - !left.getTagValue(TAG).isDefined && - !right.getTagValue(TAG).isDefined - ) { - // Force use of ShuffledHashJoin in preference to SortMergeJoin. With no respect to - // conf setting "spark.sql.join.preferSortMergeJoin". - val (leftBuildable, rightBuildable) = - if (BackendsApiManager.getSettings.utilizeShuffledHashJoinHint()) { - // Currently, ClickHouse backend can not support AQE, so it needs to use join hint - // to decide the build side, after supporting AQE, will remove this. - val leftHintEnabled = hintToShuffleHashJoinLeft(hint) - val rightHintEnabled = hintToShuffleHashJoinRight(hint) - val leftHintMergeEnabled = hint.leftHint.exists(_.strategy.contains(SHUFFLE_MERGE)) - val rightHintMergeEnabled = hint.rightHint.exists(_.strategy.contains(SHUFFLE_MERGE)) - if (leftHintEnabled || rightHintEnabled) { - (leftHintEnabled, rightHintEnabled) - } else if (leftHintMergeEnabled || rightHintMergeEnabled) { - // hack: when set SHUFFLE_MERGE hint, it means that - // it don't use this side as the build side - (!leftHintMergeEnabled, !rightHintMergeEnabled) - } else { - ( - BackendsApiManager.getSettings.supportHashBuildJoinTypeOnLeft(joinType), - BackendsApiManager.getSettings.supportHashBuildJoinTypeOnRight(joinType)) - } - } else { - (canBuildShuffledHashJoinLeft(joinType), canBuildShuffledHashJoinRight(joinType)) - } - - if (!leftBuildable && !rightBuildable) { - return Nil - } - val buildSide = if (!leftBuildable) { - BuildRight - } else if (!rightBuildable) { - BuildLeft - } else { - getSmallerSide(left, right) - } - - return Option(buildSide) - .map { - buildSide => - Seq( - joins.ShuffledHashJoinExec( - leftKeys, - rightKeys, - joinType, - buildSide, - condition, - planLater(left), - planLater(right))) - } - .getOrElse(Nil) - } - Nil - } - } - - def existsMultiJoins(plan: LogicalPlan, count: Int = 0): Boolean = { - plan match { - case plan: Join => - if ((count + 1) >= GlutenConfig.getConf.logicalJoinOptimizationThrottle) return true - plan.children.exists(existsMultiJoins(_, count + 1)) - case plan: Project => - if ((count + 1) >= GlutenConfig.getConf.logicalJoinOptimizationThrottle) return true - plan.children.exists(existsMultiJoins(_, count + 1)) - case other => false - } - } - - def tagNotTransformable(plan: LogicalPlan, reason: String): LogicalPlan = { - plan.setTagValue(TAG, TRANSFORM_UNSUPPORTED(Some(reason))) - plan - } - - def tagNotTransformableRecursive(plan: LogicalPlan, reason: String): LogicalPlan = { - tagNotTransformable( - plan.withNewChildren(plan.children.map(tagNotTransformableRecursive(_, reason))), - reason) - } - - def existLeftOuterJoin(plan: LogicalPlan): Boolean = { - plan.collect { - case join: Join if join.joinType.sql.equals("LEFT OUTER") => - return true - }.size > 0 - } - - override def apply(plan: LogicalPlan): Seq[SparkPlan] = - LogicalPlanSelector.maybeNil(session, plan) { - // Ignore forceShuffledHashJoin if exist multi continuous joins - if ( - GlutenConfig.getConf.enableLogicalJoinOptimize && - existsMultiJoins(plan) && existLeftOuterJoin(plan) - ) { - tagNotTransformableRecursive(plan, "exist multi continuous joins") - } - plan match { - // If the build side of BHJ is already decided by AQE, we need to keep the build side. - case JoinSelectionShim.ExtractEquiJoinKeysShim( - joinType, - leftKeys, - rightKeys, - condition, - left, - right, - hint) => - extractEqualJoinKeyCondition( - joinType, - leftKeys, - rightKeys, - condition, - left, - right, - hint, - GlutenConfig.getConf.forceShuffledHashJoin) - case _ => Nil - } - } -} diff --git a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/OffloadSingleNode.scala b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/OffloadSingleNode.scala index 11db0bc1faf1..1f6f840b5552 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/OffloadSingleNode.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/OffloadSingleNode.scala @@ -27,6 +27,8 @@ import org.apache.gluten.utils.{LogLevelUtil, PlanUtil} import org.apache.spark.api.python.EvalPythonExecTransformer import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression, InputFileBlockLength, InputFileBlockStart, InputFileName, NamedExpression} +import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight, BuildSide} +import org.apache.spark.sql.catalyst.plans.logical.Join import org.apache.spark.sql.execution._ import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, ObjectHashAggregateExec, SortAggregateExec} import org.apache.spark.sql.execution.datasources.WriteFilesExec @@ -136,7 +138,7 @@ case class OffloadJoin() extends OffloadSingleNode with LogLevelUtil { plan.leftKeys, plan.rightKeys, plan.joinType, - TransformHints.getShuffleHashJoinBuildSide(plan), + OffloadJoin.getBuildSide(plan), plan.condition, left, right, @@ -187,6 +189,31 @@ case class OffloadJoin() extends OffloadSingleNode with LogLevelUtil { } } +object OffloadJoin { + + def getBuildSide(shj: ShuffledHashJoinExec): BuildSide = { + val leftBuildable = + BackendsApiManager.getSettings.supportHashBuildJoinTypeOnLeft(shj.joinType) + val rightBuildable = + BackendsApiManager.getSettings.supportHashBuildJoinTypeOnRight(shj.joinType) + if (!leftBuildable) { + BuildRight + } else if (!rightBuildable) { + BuildLeft + } else { + shj.logicalLink match { + case Some(join: Join) => + val leftSize = join.left.stats.sizeInBytes + val rightSize = join.right.stats.sizeInBytes + if (rightSize <= leftSize) BuildRight else BuildLeft + // Only the ShuffledHashJoinExec generated directly in some spark tests is not link + // logical plan, such as OuterJoinSuite. + case _ => shj.buildSide + } + } + } +} + case class OffloadProject() extends OffloadSingleNode with LogLevelUtil { private def containsInputFileRelatedExpr(expr: Expression): Boolean = { expr match { diff --git a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/TransformHintRule.scala b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/TransformHintRule.scala index aa7aab759ef8..7fb451057a2e 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/TransformHintRule.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/TransformHintRule.scala @@ -29,8 +29,6 @@ import org.apache.spark.api.python.EvalPythonExecTransformer import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.AttributeReference -import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight, BuildSide} -import org.apache.spark.sql.catalyst.plans.logical.Join import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.trees.TreeNodeTag import org.apache.spark.sql.execution._ @@ -154,33 +152,6 @@ object TransformHints { tag(plan, newTag) } } - - def getShuffleHashJoinBuildSide(shj: ShuffledHashJoinExec): BuildSide = { - if (BackendsApiManager.getSettings.utilizeShuffledHashJoinHint()) { - shj.buildSide - } else { - val leftBuildable = BackendsApiManager.getSettings - .supportHashBuildJoinTypeOnLeft(shj.joinType) - val rightBuildable = BackendsApiManager.getSettings - .supportHashBuildJoinTypeOnRight(shj.joinType) - - if (!leftBuildable) { - BuildRight - } else if (!rightBuildable) { - BuildLeft - } else { - shj.logicalLink match { - case Some(join: Join) => - val leftSize = join.left.stats.sizeInBytes - val rightSize = join.right.stats.sizeInBytes - if (rightSize <= leftSize) BuildRight else BuildLeft - // Only the ShuffledHashJoinExec generated directly in some spark tests is not link - // logical plan, such as OuterJoinSuite. - case _ => shj.buildSide - } - } - } - } } case class FallbackOnANSIMode(session: SparkSession) extends Rule[SparkPlan] { @@ -205,6 +176,9 @@ case class FallbackMultiCodegens(session: SparkSession) extends Rule[SparkPlan] case plan: ShuffledHashJoinExec => if ((count + 1) >= optimizeLevel) return true plan.children.exists(existsMultiCodegens(_, count + 1)) + case plan: SortMergeJoinExec if GlutenConfig.getConf.forceShuffledHashJoin => + if ((count + 1) >= optimizeLevel) return true + plan.children.exists(existsMultiCodegens(_, count + 1)) case other => false } @@ -415,7 +389,7 @@ case class AddTransformHintRule() extends Rule[SparkPlan] { plan.leftKeys, plan.rightKeys, plan.joinType, - TransformHints.getShuffleHashJoinBuildSide(plan), + OffloadJoin.getBuildSide(plan), plan.condition, plan.left, plan.right, diff --git a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/rewrite/RewriteJoin.scala b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/rewrite/RewriteJoin.scala new file mode 100644 index 000000000000..e038f5af0a07 --- /dev/null +++ b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/rewrite/RewriteJoin.scala @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.gluten.extension.columnar.rewrite + +import org.apache.gluten.GlutenConfig +import org.apache.gluten.execution.SortUtils + +import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight, BuildSide, JoinSelectionHelper} +import org.apache.spark.sql.catalyst.plans.JoinType +import org.apache.spark.sql.execution.SparkPlan +import org.apache.spark.sql.execution.joins.{ShuffledHashJoinExec, SortMergeJoinExec} + +/** + * If force ShuffledHashJoin, convert [[SortMergeJoinExec]] to [[ShuffledHashJoinExec]]. There is no + * need to select a smaller table as buildSide here, it will be reselected when offloading. + */ +object RewriteJoin extends RewriteSingleNode with JoinSelectionHelper { + + private def getBuildSide(joinType: JoinType): Option[BuildSide] = { + val leftBuildable = canBuildShuffledHashJoinLeft(joinType) + val rightBuildable = canBuildShuffledHashJoinRight(joinType) + if (rightBuildable) { + Some(BuildRight) + } else if (leftBuildable) { + Some(BuildLeft) + } else { + None + } + } + + override def rewrite(plan: SparkPlan): SparkPlan = plan match { + case smj: SortMergeJoinExec if GlutenConfig.getConf.forceShuffledHashJoin => + getBuildSide(smj.joinType) match { + case Some(buildSide) => + ShuffledHashJoinExec( + smj.leftKeys, + smj.rightKeys, + smj.joinType, + buildSide, + smj.condition, + SortUtils.dropPartialSort(smj.left), + SortUtils.dropPartialSort(smj.right), + smj.isSkewJoin + ) + case _ => plan + } + case _ => plan + } +} diff --git a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/rewrite/RewriteSingleNode.scala b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/rewrite/RewriteSingleNode.scala index 01f2e29fe62d..551cfd599abd 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/rewrite/RewriteSingleNode.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/rewrite/RewriteSingleNode.scala @@ -35,6 +35,6 @@ trait RewriteSingleNode { object RewriteSingleNode { def allRules(): Seq[RewriteSingleNode] = { - Seq(RewriteIn, RewriteMultiChildrenCount, PullOutPreProject, PullOutPostProject) + Seq(RewriteIn, RewriteMultiChildrenCount, RewriteJoin, PullOutPreProject, PullOutPostProject) } } diff --git a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/rewrite/RewriteSparkPlanRulesManager.scala b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/rewrite/RewriteSparkPlanRulesManager.scala index 8706e5618f6b..34fe34f3f3fa 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/rewrite/RewriteSparkPlanRulesManager.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/rewrite/RewriteSparkPlanRulesManager.scala @@ -67,12 +67,11 @@ class RewriteSparkPlanRulesManager private (rewriteRules: Seq[RewriteSingleNode] } } - private def getTransformHintBack( - origin: SparkPlan, - rewrittenPlan: SparkPlan): Option[TransformHint] = { - // The rewritten plan may contain more nodes than origin, here use the node name to get it back + private def getTransformHintBack(rewrittenPlan: SparkPlan): Option[TransformHint] = { + // The rewritten plan may contain more nodes than origin, for now it should only be + // `ProjectExec`. val target = rewrittenPlan.collect { - case p if p.nodeName == origin.nodeName => p + case p if !p.isInstanceOf[ProjectExec] && !p.isInstanceOf[RewrittenNodeWall] => p } assert(target.size == 1) TransformHints.getHintOption(target.head) @@ -113,7 +112,7 @@ class RewriteSparkPlanRulesManager private (rewriteRules: Seq[RewriteSingleNode] origin } else { addHint.apply(rewrittenPlan) - val hint = getTransformHintBack(origin, rewrittenPlan) + val hint = getTransformHintBack(rewrittenPlan) if (hint.isDefined) { // If the rewritten plan is still not transformable, return the original plan. TransformHints.tag(origin, hint.get) diff --git a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index 5df53953e4cc..a17f72de3121 100644 --- a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -331,7 +331,6 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("Shouldn't change broadcast join buildSide if user clearly specified") .exclude("Shouldn't bias towards build right if user didn't specify") .exclude("SPARK-23192: broadcast hint should be retained after using the cached data") - .exclude("broadcast hint isn't propagated after a join") .exclude("broadcast join where streamed side's output partitioning is HashPartitioning") .exclude("broadcast join where streamed side's output partitioning is PartitioningCollection") enableSuite[GlutenSQLQuerySuite] diff --git a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/execution/joins/GlutenBroadcastJoinSuite.scala b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/execution/joins/GlutenBroadcastJoinSuite.scala index c9ccc1afc75d..f418ec06645c 100644 --- a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/execution/joins/GlutenBroadcastJoinSuite.scala +++ b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/execution/joins/GlutenBroadcastJoinSuite.scala @@ -22,7 +22,6 @@ import org.apache.gluten.utils.{BackendTestUtils, SystemParameters} import org.apache.spark.sql.{GlutenTestsCommonTrait, SparkSession} import org.apache.spark.sql.catalyst.optimizer._ -import org.apache.spark.sql.execution.exchange.EnsureRequirements import org.apache.spark.sql.functions.broadcast import org.apache.spark.sql.internal.SQLConf @@ -41,8 +40,6 @@ class GlutenBroadcastJoinSuite extends BroadcastJoinSuite with GlutenTestsCommon * Create a new [[SparkSession]] running in local-cluster mode with unsafe and codegen enabled. */ - private val EnsureRequirements = new EnsureRequirements() - private val isVeloxBackend = BackendTestUtils.isVeloxBackendLoaded() // BroadcastHashJoinExecTransformer is not case class, can't call toString method, @@ -235,22 +232,6 @@ class GlutenBroadcastJoinSuite extends BroadcastJoinSuite with GlutenTestsCommon } } - testGluten("broadcast hint isn't propagated after a join") { - withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") { - val df1 = Seq((1, "4"), (2, "2")).toDF("key", "value") - val df2 = Seq((1, "1"), (2, "2")).toDF("key", "value") - val df3 = df1.join(broadcast(df2), Seq("key"), "inner").drop(df2("key")) - - val df4 = Seq((1, "5"), (2, "5")).toDF("key", "value") - val df5 = df4.join(df3, Seq("key"), "inner") - - val plan = EnsureRequirements.apply(df5.queryExecution.sparkPlan) - - assert(plan.collect { case p: BroadcastHashJoinExec => p }.size === 1) - assert(plan.collect { case p: ShuffledHashJoinExec => p }.size === 1) - } - } - private def assertJoinBuildSide(sqlStr: String, joinMethod: String, buildSide: BuildSide): Any = { val executedPlan = stripAQEPlan(sql(sqlStr).queryExecution.executedPlan) executedPlan match { diff --git a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/extension/GlutenSessionExtensionSuite.scala b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/extension/GlutenSessionExtensionSuite.scala index 00b4bf5821f1..4df9c63b3ef6 100644 --- a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/extension/GlutenSessionExtensionSuite.scala +++ b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/extension/GlutenSessionExtensionSuite.scala @@ -16,7 +16,7 @@ */ package org.apache.spark.sql.extension -import org.apache.gluten.extension.{ColumnarOverrideRules, JoinSelectionOverrides} +import org.apache.gluten.extension.ColumnarOverrideRules import org.apache.spark.SparkConf import org.apache.spark.sql._ @@ -31,7 +31,6 @@ class GlutenSessionExtensionSuite extends GlutenSQLTestsTrait { testGluten("test gluten extensions") { assert(spark.sessionState.columnarRules.contains(ColumnarOverrideRules(spark))) - assert(spark.sessionState.planner.strategies.contains(JoinSelectionOverrides(spark))) assert(spark.sessionState.planner.strategies.contains(MySparkStrategy(spark))) assert(spark.sessionState.analyzer.extendedResolutionRules.contains(MyRule(spark))) diff --git a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index 8418cba237e3..ae3e7c7b8e9d 100644 --- a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -865,7 +865,6 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("Shouldn't change broadcast join buildSide if user clearly specified") .exclude("Shouldn't bias towards build right if user didn't specify") .exclude("SPARK-23192: broadcast hint should be retained after using the cached data") - .exclude("broadcast hint isn't propagated after a join") .exclude("broadcast join where streamed side's output partitioning is HashPartitioning") enableSuite[GlutenExistenceJoinSuite] diff --git a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala index 026e2dde0055..92e6fee97ea9 100644 --- a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala +++ b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala @@ -85,11 +85,11 @@ class GlutenReplaceHashWithSortAggSuite withTempView("t1", "t2") { spark.range(100).selectExpr("id as key").createOrReplaceTempView("t1") spark.range(50).selectExpr("id as key").createOrReplaceTempView("t2") - Seq("COUNT", "COLLECT_LIST").foreach { - aggExpr => + Seq(("COUNT", 0, 1, 2, 0), ("COLLECT_LIST", 2, 0, 2, 0)).foreach { + aggExprInfo => val query = s""" - |SELECT key, $aggExpr(key) + |SELECT key, ${aggExprInfo._1}(key) |FROM |( | SELECT /*+ SHUFFLE_MERGE(t1) */ t1.key AS key @@ -102,7 +102,7 @@ class GlutenReplaceHashWithSortAggSuite if (BackendsApiManager.getSettings.mergeTwoPhasesHashBaseAggregateIfNeed()) { checkAggs(query, 1, 0, 1, 0) } else { - checkAggs(query, 2, 0, 2, 0) + checkAggs(query, aggExprInfo._2, aggExprInfo._3, aggExprInfo._4, aggExprInfo._5) } } } diff --git a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/extension/GlutenSessionExtensionSuite.scala b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/extension/GlutenSessionExtensionSuite.scala index 00b4bf5821f1..4df9c63b3ef6 100644 --- a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/extension/GlutenSessionExtensionSuite.scala +++ b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/extension/GlutenSessionExtensionSuite.scala @@ -16,7 +16,7 @@ */ package org.apache.spark.sql.extension -import org.apache.gluten.extension.{ColumnarOverrideRules, JoinSelectionOverrides} +import org.apache.gluten.extension.ColumnarOverrideRules import org.apache.spark.SparkConf import org.apache.spark.sql._ @@ -31,7 +31,6 @@ class GlutenSessionExtensionSuite extends GlutenSQLTestsTrait { testGluten("test gluten extensions") { assert(spark.sessionState.columnarRules.contains(ColumnarOverrideRules(spark))) - assert(spark.sessionState.planner.strategies.contains(JoinSelectionOverrides(spark))) assert(spark.sessionState.planner.strategies.contains(MySparkStrategy(spark))) assert(spark.sessionState.analyzer.extendedResolutionRules.contains(MyRule(spark))) diff --git a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index 91127c4ba9bb..0da19922ffda 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -857,7 +857,6 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("Shouldn't change broadcast join buildSide if user clearly specified") .exclude("Shouldn't bias towards build right if user didn't specify") .exclude("SPARK-23192: broadcast hint should be retained after using the cached data") - .exclude("broadcast hint isn't propagated after a join") .exclude("broadcast join where streamed side's output partitioning is HashPartitioning") .exclude("broadcast join where streamed side's output partitioning is PartitioningCollection") @@ -1119,9 +1118,6 @@ class VeloxTestSettings extends BackendTestSettings { enableSuite[GlutenJoinSuite] // exclude as it check spark plan .exclude("SPARK-36794: Ignore duplicated key when building relation for semi/anti hash join") - // exclude as it check for SMJ node - .exclude( - "SPARK-43113: Full outer join with duplicate stream-side references in condition (SMJ)") enableSuite[GlutenMathFunctionsSuite] enableSuite[GlutenMetadataCacheSuite] .exclude("SPARK-16336,SPARK-27961 Suggest fixing FileNotFoundException") diff --git a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenJoinSuite.scala b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenJoinSuite.scala index 4ac8bd3ea8bf..8a5a5923f729 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenJoinSuite.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenJoinSuite.scala @@ -16,9 +16,6 @@ */ package org.apache.spark.sql -import org.apache.spark.sql.execution.SparkPlan -import org.apache.spark.sql.execution.joins.ShuffledHashJoinExec - class GlutenJoinSuite extends JoinSuite with GlutenSQLTestsTrait { override def testNameBlackList: Seq[String] = Seq( @@ -55,14 +52,4 @@ class GlutenJoinSuite extends JoinSuite with GlutenSQLTestsTrait { |""".stripMargin checkAnswer(spark.sql(sql), Seq(Row(0, 1), Row(1, 2), Row(2, 3))) } - - testGluten( - "SPARK-43113: Full outer join with duplicate stream-side" + - " references in condition (SHJ)") { - def check(plan: SparkPlan): Unit = { - assert(collect(plan) { case _: ShuffledHashJoinExec => true }.size === 1) - } - - dupStreamSideColTest("MERGE", check) - } } diff --git a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala index 8d795bbffea0..332c21418a9b 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala @@ -84,11 +84,11 @@ class GlutenReplaceHashWithSortAggSuite withTempView("t1", "t2") { spark.range(100).selectExpr("id as key").createOrReplaceTempView("t1") spark.range(50).selectExpr("id as key").createOrReplaceTempView("t2") - Seq("COUNT", "COLLECT_LIST").foreach { - aggExpr => + Seq(("COUNT", 0, 1, 2, 0), ("COLLECT_LIST", 2, 0, 2, 0)).foreach { + aggExprInfo => val query = s""" - |SELECT key, $aggExpr(key) + |SELECT key, ${aggExprInfo._1}(key) |FROM |( | SELECT /*+ SHUFFLE_MERGE(t1) */ t1.key AS key @@ -98,7 +98,7 @@ class GlutenReplaceHashWithSortAggSuite |) |GROUP BY key """.stripMargin - checkAggs(query, 2, 0, 2, 0) + checkAggs(query, aggExprInfo._2, aggExprInfo._3, aggExprInfo._4, aggExprInfo._5) } } } diff --git a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/extension/GlutenSessionExtensionSuite.scala b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/extension/GlutenSessionExtensionSuite.scala index 00b4bf5821f1..4df9c63b3ef6 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/extension/GlutenSessionExtensionSuite.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/extension/GlutenSessionExtensionSuite.scala @@ -16,7 +16,7 @@ */ package org.apache.spark.sql.extension -import org.apache.gluten.extension.{ColumnarOverrideRules, JoinSelectionOverrides} +import org.apache.gluten.extension.ColumnarOverrideRules import org.apache.spark.SparkConf import org.apache.spark.sql._ @@ -31,7 +31,6 @@ class GlutenSessionExtensionSuite extends GlutenSQLTestsTrait { testGluten("test gluten extensions") { assert(spark.sessionState.columnarRules.contains(ColumnarOverrideRules(spark))) - assert(spark.sessionState.planner.strategies.contains(JoinSelectionOverrides(spark))) assert(spark.sessionState.planner.strategies.contains(MySparkStrategy(spark))) assert(spark.sessionState.analyzer.extendedResolutionRules.contains(MyRule(spark))) diff --git a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index 6162b5651980..e54aca34ec75 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -866,7 +866,6 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("Shouldn't change broadcast join buildSide if user clearly specified") .exclude("Shouldn't bias towards build right if user didn't specify") .exclude("SPARK-23192: broadcast hint should be retained after using the cached data") - .exclude("broadcast hint isn't propagated after a join") .exclude("broadcast join where streamed side's output partitioning is HashPartitioning") enableSuite[GlutenExistenceJoinSuite] @@ -1136,9 +1135,6 @@ class VeloxTestSettings extends BackendTestSettings { enableSuite[GlutenJoinSuite] // exclude as it check spark plan .exclude("SPARK-36794: Ignore duplicated key when building relation for semi/anti hash join") - // exclude as it check for SMJ node - .exclude( - "SPARK-43113: Full outer join with duplicate stream-side references in condition (SMJ)") enableSuite[GlutenMathFunctionsSuite] enableSuite[GlutenMetadataCacheSuite] .exclude("SPARK-16336,SPARK-27961 Suggest fixing FileNotFoundException") diff --git a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenJoinSuite.scala b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenJoinSuite.scala index 09718fb1a439..5ef4056201ed 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenJoinSuite.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenJoinSuite.scala @@ -16,9 +16,6 @@ */ package org.apache.spark.sql -import org.apache.spark.sql.execution.SparkPlan -import org.apache.spark.sql.execution.joins.ShuffledHashJoinExec - class GlutenJoinSuite extends JoinSuite with GlutenSQLTestsTrait { override def testNameBlackList: Seq[String] = Seq( @@ -57,14 +54,4 @@ class GlutenJoinSuite extends JoinSuite with GlutenSQLTestsTrait { |""".stripMargin checkAnswer(spark.sql(sql), Seq(Row(0, 1), Row(1, 2), Row(2, 3))) } - - testGluten( - "SPARK-43113: Full outer join with duplicate stream-side" + - " references in condition (SHJ)") { - def check(plan: SparkPlan): Unit = { - assert(collect(plan) { case _: ShuffledHashJoinExec => true }.size === 1) - } - - dupStreamSideColTest("MERGE", check) - } } diff --git a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala index 8d795bbffea0..332c21418a9b 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/execution/GlutenReplaceHashWithSortAggSuite.scala @@ -84,11 +84,11 @@ class GlutenReplaceHashWithSortAggSuite withTempView("t1", "t2") { spark.range(100).selectExpr("id as key").createOrReplaceTempView("t1") spark.range(50).selectExpr("id as key").createOrReplaceTempView("t2") - Seq("COUNT", "COLLECT_LIST").foreach { - aggExpr => + Seq(("COUNT", 0, 1, 2, 0), ("COLLECT_LIST", 2, 0, 2, 0)).foreach { + aggExprInfo => val query = s""" - |SELECT key, $aggExpr(key) + |SELECT key, ${aggExprInfo._1}(key) |FROM |( | SELECT /*+ SHUFFLE_MERGE(t1) */ t1.key AS key @@ -98,7 +98,7 @@ class GlutenReplaceHashWithSortAggSuite |) |GROUP BY key """.stripMargin - checkAggs(query, 2, 0, 2, 0) + checkAggs(query, aggExprInfo._2, aggExprInfo._3, aggExprInfo._4, aggExprInfo._5) } } } diff --git a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/extension/GlutenSessionExtensionSuite.scala b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/extension/GlutenSessionExtensionSuite.scala index 00b4bf5821f1..4df9c63b3ef6 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/extension/GlutenSessionExtensionSuite.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/extension/GlutenSessionExtensionSuite.scala @@ -16,7 +16,7 @@ */ package org.apache.spark.sql.extension -import org.apache.gluten.extension.{ColumnarOverrideRules, JoinSelectionOverrides} +import org.apache.gluten.extension.ColumnarOverrideRules import org.apache.spark.SparkConf import org.apache.spark.sql._ @@ -31,7 +31,6 @@ class GlutenSessionExtensionSuite extends GlutenSQLTestsTrait { testGluten("test gluten extensions") { assert(spark.sessionState.columnarRules.contains(ColumnarOverrideRules(spark))) - assert(spark.sessionState.planner.strategies.contains(JoinSelectionOverrides(spark))) assert(spark.sessionState.planner.strategies.contains(MySparkStrategy(spark))) assert(spark.sessionState.analyzer.extendedResolutionRules.contains(MyRule(spark))) diff --git a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala index ec80ba86a7b9..4ef96bec27eb 100644 --- a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala +++ b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala @@ -153,9 +153,6 @@ class GlutenConfig(conf: SQLConf) extends Logging { def logicalJoinOptimizationThrottle: Integer = conf.getConf(COLUMNAR_LOGICAL_JOIN_OPTIMIZATION_THROTTLE) - def enableLogicalJoinOptimize: Boolean = - conf.getConf(COLUMNAR_LOGICAL_JOIN_OPTIMIZATION_ENABLED) - def enableScanOnly: Boolean = conf.getConf(COLUMNAR_SCAN_ONLY_ENABLED) def tmpFile: Option[String] = conf.getConf(COLUMNAR_TEMP_DIR) @@ -1007,13 +1004,6 @@ object GlutenConfig { .intConf .createWithDefault(12) - val COLUMNAR_LOGICAL_JOIN_OPTIMIZATION_ENABLED = - buildConf("spark.gluten.sql.columnar.logicalJoinOptimizeEnable") - .internal() - .doc("Enable or disable columnar logicalJoinOptimize.") - .booleanConf - .createWithDefault(false) - val COLUMNAR_SCAN_ONLY_ENABLED = buildConf("spark.gluten.sql.columnar.scanOnly") .internal() diff --git a/shims/spark33/src/main/scala/org/apache/spark/sql/execution/JoinSelectionShim.scala b/shims/spark33/src/main/scala/org/apache/spark/sql/execution/JoinSelectionShim.scala deleted file mode 100644 index 20b9dea333a5..000000000000 --- a/shims/spark33/src/main/scala/org/apache/spark/sql/execution/JoinSelectionShim.scala +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.sql.execution - -import org.apache.spark.sql.catalyst.expressions.Expression -import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys -import org.apache.spark.sql.catalyst.plans.JoinType -import org.apache.spark.sql.catalyst.plans.logical.{Join, JoinHint, LogicalPlan} - -// https://issues.apache.org/jira/browse/SPARK-36745 -object JoinSelectionShim { - object ExtractEquiJoinKeysShim { - type ReturnType = - ( - JoinType, - Seq[Expression], - Seq[Expression], - Option[Expression], - LogicalPlan, - LogicalPlan, - JoinHint) - def unapply(join: Join): Option[ReturnType] = { - ExtractEquiJoinKeys.unapply(join).map { - case ( - joinType, - leftKeys, - rightKeys, - otherPredicates, - predicatesOfJoinKeys, - left, - right, - hint) => - (joinType, leftKeys, rightKeys, otherPredicates, left, right, hint) - } - } - } -} diff --git a/shims/spark34/src/main/scala/org/apache/spark/sql/execution/JoinSelectionShim.scala b/shims/spark34/src/main/scala/org/apache/spark/sql/execution/JoinSelectionShim.scala deleted file mode 100644 index 20b9dea333a5..000000000000 --- a/shims/spark34/src/main/scala/org/apache/spark/sql/execution/JoinSelectionShim.scala +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.sql.execution - -import org.apache.spark.sql.catalyst.expressions.Expression -import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys -import org.apache.spark.sql.catalyst.plans.JoinType -import org.apache.spark.sql.catalyst.plans.logical.{Join, JoinHint, LogicalPlan} - -// https://issues.apache.org/jira/browse/SPARK-36745 -object JoinSelectionShim { - object ExtractEquiJoinKeysShim { - type ReturnType = - ( - JoinType, - Seq[Expression], - Seq[Expression], - Option[Expression], - LogicalPlan, - LogicalPlan, - JoinHint) - def unapply(join: Join): Option[ReturnType] = { - ExtractEquiJoinKeys.unapply(join).map { - case ( - joinType, - leftKeys, - rightKeys, - otherPredicates, - predicatesOfJoinKeys, - left, - right, - hint) => - (joinType, leftKeys, rightKeys, otherPredicates, left, right, hint) - } - } - } -} diff --git a/shims/spark35/src/main/scala/org/apache/spark/sql/execution/JoinSelectionShim.scala b/shims/spark35/src/main/scala/org/apache/spark/sql/execution/JoinSelectionShim.scala deleted file mode 100644 index 20b9dea333a5..000000000000 --- a/shims/spark35/src/main/scala/org/apache/spark/sql/execution/JoinSelectionShim.scala +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.sql.execution - -import org.apache.spark.sql.catalyst.expressions.Expression -import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys -import org.apache.spark.sql.catalyst.plans.JoinType -import org.apache.spark.sql.catalyst.plans.logical.{Join, JoinHint, LogicalPlan} - -// https://issues.apache.org/jira/browse/SPARK-36745 -object JoinSelectionShim { - object ExtractEquiJoinKeysShim { - type ReturnType = - ( - JoinType, - Seq[Expression], - Seq[Expression], - Option[Expression], - LogicalPlan, - LogicalPlan, - JoinHint) - def unapply(join: Join): Option[ReturnType] = { - ExtractEquiJoinKeys.unapply(join).map { - case ( - joinType, - leftKeys, - rightKeys, - otherPredicates, - predicatesOfJoinKeys, - left, - right, - hint) => - (joinType, leftKeys, rightKeys, otherPredicates, left, right, hint) - } - } - } -}