From 897fae6ed4d5d191323e27f88e1ad124e0d34d4a Mon Sep 17 00:00:00 2001 From: Edmondo Porcu Date: Thu, 3 Oct 2024 07:28:15 -0400 Subject: [PATCH 01/14] Using tpch script from datafusion-benchmarks --- .github/workflows/rust.yml | 34 ++- .gitignore | 1 + Cargo.toml | 6 +- scripts/gen-test-data.sh | 6 + src/planner.rs | 55 +++-- testdata/expected-plans/q1.txt | 40 ++-- testdata/expected-plans/q10.txt | 148 +++++++------ testdata/expected-plans/q11.txt | 299 +++++++++++++-------------- testdata/expected-plans/q12.txt | 64 +++--- testdata/expected-plans/q13.txt | 93 ++++----- testdata/expected-plans/q14.txt | 65 +++--- testdata/expected-plans/q16.txt | 120 +++++------ testdata/expected-plans/q17.txt | 133 ++++++------ testdata/expected-plans/q18.txt | 122 ++++++----- testdata/expected-plans/q19.txt | 97 ++++----- testdata/expected-plans/q2.txt | 353 ++++++++++++++++---------------- testdata/expected-plans/q20.txt | 204 +++++++++--------- testdata/expected-plans/q21.txt | 237 +++++++++++---------- testdata/expected-plans/q22.txt | 155 +++++++------- testdata/expected-plans/q3.txt | 114 +++++------ testdata/expected-plans/q4.txt | 85 ++++---- testdata/expected-plans/q5.txt | 223 ++++++++++---------- testdata/expected-plans/q6.txt | 28 +-- testdata/expected-plans/q7.txt | 267 ++++++++++++------------ testdata/expected-plans/q8.txt | 280 ++++++++++++------------- testdata/expected-plans/q9.txt | 192 ++++++++--------- tpch/requirements.txt | 4 + tpch/tpchgen.py | 264 ++++++++++++++++++++++++ 28 files changed, 1938 insertions(+), 1751 deletions(-) create mode 100755 scripts/gen-test-data.sh create mode 100644 tpch/requirements.txt create mode 100644 tpch/tpchgen.py diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 66d4aeb..7706511 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -2,24 +2,36 @@ name: Rust on: push: - branches: [ "main" ] pull_request: - branches: [ "main" ] env: CARGO_TERM_COLOR: always + PYTHON_VERSION: 3.9 + TPCH_SAMPLING_RATE: "1" # (1/100) + TPCH_TEST_PARTITIONS: "2" + TPCH_DATA_PATH: "data" jobs: build: - runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - name: Install protobuf compiler - shell: bash - run: sudo apt-get install protobuf-compiler - - name: Build Rust code - run: cargo build --verbose - - name: Run tests - run: cargo test --verbose + - uses: actions/checkout@v3 + - name: Install protobuf compiler + shell: bash + run: sudo apt-get install protobuf-compiler + - name: Build Rust code + run: cargo build --verbose + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: ${{ env.PYTHON_VERSION }} + - name: Install test dependencies + run: | + python -m pip install --upgrade pip + pip install -r tpch/requirements.txt + - name: Generate test data + run: | + ./scripts/gen-test-data.sh + - name: Run tests + run: cargo test --verbose diff --git a/.gitignore b/.gitignore index 0c56375..1a7e859 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ venv *.so *.log results-sf* +data diff --git a/Cargo.toml b/Cargo.toml index 081520f..bf7c661 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,6 +45,10 @@ uuid = "1.2" rustc_version = "0.4.0" tonic-build = { version = "0.8", default-features = false, features = ["transport", "prost"] } +[dev-dependencies] +anyhow = "1.0.89" +pretty_assertions = "1.4.0" + [lib] name = "datafusion_ray" crate-type = ["cdylib", "rlib"] @@ -54,4 +58,4 @@ name = "datafusion_ray._datafusion_ray_internal" [profile.release] codegen-units = 1 -lto = true \ No newline at end of file +lto = true diff --git a/scripts/gen-test-data.sh b/scripts/gen-test-data.sh new file mode 100755 index 0000000..23772f2 --- /dev/null +++ b/scripts/gen-test-data.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +set -e +mkdir -p data +python -m tpch.tpchgen generate --scale-factor "$TPCH_SAMPLING_RATE" --partitions "$TPCH_TEST_PARTITIONS" +python -m tpch.tpchgen convert --partitions "$TPCH_TEST_PARTITIONS" diff --git a/src/planner.rs b/src/planner.rs index b940c9d..5c58663 100644 --- a/src/planner.rs +++ b/src/planner.rs @@ -264,125 +264,124 @@ mod test { use super::*; use datafusion::physical_plan::displayable; use datafusion::prelude::{ParquetReadOptions, SessionConfig, SessionContext}; - use std::fs; + use pretty_assertions::assert_eq; use std::path::Path; + use std::{env, fs}; + type TestResult = std::result::Result; #[tokio::test] - async fn test_q1() -> Result<()> { + async fn test_q1() -> TestResult<()> { do_test(1).await } #[tokio::test] - async fn test_q2() -> Result<()> { + async fn test_q2() -> TestResult<()> { do_test(2).await } #[tokio::test] - async fn test_q3() -> Result<()> { + async fn test_q3() -> TestResult<()> { do_test(3).await } #[tokio::test] - async fn test_q4() -> Result<()> { + async fn test_q4() -> TestResult<()> { do_test(4).await } #[tokio::test] - async fn test_q5() -> Result<()> { + async fn test_q5() -> TestResult<()> { do_test(5).await } #[tokio::test] - async fn test_q6() -> Result<()> { + async fn test_q6() -> TestResult<()> { do_test(6).await } #[tokio::test] - async fn test_q7() -> Result<()> { + async fn test_q7() -> TestResult<()> { do_test(7).await } #[tokio::test] - async fn test_q8() -> Result<()> { + async fn test_q8() -> TestResult<()> { do_test(8).await } #[tokio::test] - async fn test_q9() -> Result<()> { + async fn test_q9() -> TestResult<()> { do_test(9).await } #[tokio::test] - async fn test_q10() -> Result<()> { + async fn test_q10() -> TestResult<()> { do_test(10).await } #[tokio::test] - async fn test_q11() -> Result<()> { + async fn test_q11() -> TestResult<()> { do_test(11).await } #[tokio::test] - async fn test_q12() -> Result<()> { + async fn test_q12() -> TestResult<()> { do_test(12).await } #[tokio::test] - async fn test_q13() -> Result<()> { + async fn test_q13() -> TestResult<()> { do_test(13).await } #[tokio::test] - async fn test_q14() -> Result<()> { + async fn test_q14() -> TestResult<()> { do_test(14).await } #[ignore] #[tokio::test] - async fn test_q15() -> Result<()> { + async fn test_q15() -> TestResult<()> { do_test(15).await } #[tokio::test] - async fn test_q16() -> Result<()> { + async fn test_q16() -> TestResult<()> { do_test(16).await } #[tokio::test] - async fn test_q17() -> Result<()> { + async fn test_q17() -> TestResult<()> { do_test(17).await } #[tokio::test] - async fn test_q18() -> Result<()> { + async fn test_q18() -> TestResult<()> { do_test(18).await } #[tokio::test] - async fn test_q19() -> Result<()> { + async fn test_q19() -> TestResult<()> { do_test(19).await } #[tokio::test] - async fn test_q20() -> Result<()> { + async fn test_q20() -> TestResult<()> { do_test(20).await } #[tokio::test] - async fn test_q21() -> Result<()> { + async fn test_q21() -> TestResult<()> { do_test(21).await } #[tokio::test] - async fn test_q22() -> Result<()> { + async fn test_q22() -> TestResult<()> { do_test(22).await } - async fn do_test(n: u8) -> Result<()> { - let data_path = "/mnt/bigdata/tpch/sf10-parquet"; - if !Path::new(&data_path).exists() { - return Ok(()); - } + async fn do_test(n: u8) -> TestResult<()> { + let data_path = env::var("TPCH_DATA_PATH")?; let file = format!("testdata/queries/q{n}.sql"); let sql = fs::read_to_string(&file)?; let config = SessionConfig::new().with_target_partitions(4); diff --git a/testdata/expected-plans/q1.txt b/testdata/expected-plans/q1.txt index 2396d76..73cce46 100644 --- a/testdata/expected-plans/q1.txt +++ b/testdata/expected-plans/q1.txt @@ -2,43 +2,43 @@ DataFusion Logical Plan ======================= Sort: lineitem.l_returnflag ASC NULLS LAST, lineitem.l_linestatus ASC NULLS LAST - Projection: lineitem.l_returnflag, lineitem.l_linestatus, SUM(lineitem.l_quantity) AS sum_qty, SUM(lineitem.l_extendedprice) AS sum_base_price, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS sum_disc_price, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax) AS sum_charge, AVG(lineitem.l_quantity) AS avg_qty, AVG(lineitem.l_extendedprice) AS avg_price, AVG(lineitem.l_discount) AS avg_disc, COUNT(UInt8(1)) AS count_order - Aggregate: groupBy=[[lineitem.l_returnflag, lineitem.l_linestatus]], aggr=[[SUM(lineitem.l_quantity), SUM(lineitem.l_extendedprice), SUM(CAST(lineitem.l_extendedprice AS Decimal128(35, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(35, 4))CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(35, 4))Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2))CAST(lineitem.l_discount AS Decimal128(23, 2))lineitem.l_discountDecimal128(Some(100),23,2)CAST(lineitem.l_extendedprice AS Decimal128(35, 4))lineitem.l_extendedprice AS lineitem.l_extendedprice * Decimal128(Some(100),23,2) - lineitem.l_discount) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), SUM(CAST(CAST(lineitem.l_extendedprice AS Decimal128(35, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(35, 4))CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(35, 4))Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2))CAST(lineitem.l_discount AS Decimal128(23, 2))lineitem.l_discountDecimal128(Some(100),23,2)CAST(lineitem.l_extendedprice AS Decimal128(35, 4))lineitem.l_extendedprice AS lineitem.l_extendedprice * Decimal128(Some(100),23,2) - lineitem.l_discount AS Decimal128(38, 6)) * CAST(Decimal128(Some(100),23,2) + CAST(lineitem.l_tax AS Decimal128(23, 2)) AS Decimal128(38, 6))) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), AVG(lineitem.l_quantity), AVG(lineitem.l_extendedprice), AVG(lineitem.l_discount), COUNT(UInt8(1))]] - Projection: CAST(lineitem.l_extendedprice AS Decimal128(35, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(35, 4)) AS CAST(lineitem.l_extendedprice AS Decimal128(35, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(35, 4))CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(35, 4))Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2))CAST(lineitem.l_discount AS Decimal128(23, 2))lineitem.l_discountDecimal128(Some(100),23,2)CAST(lineitem.l_extendedprice AS Decimal128(35, 4))lineitem.l_extendedprice, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_tax, lineitem.l_returnflag, lineitem.l_linestatus - Filter: lineitem.l_shipdate <= Date32("10493") - TableScan: lineitem projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], partial_filters=[lineitem.l_shipdate <= Date32("10493")] + Projection: lineitem.l_returnflag, lineitem.l_linestatus, sum(lineitem.l_quantity) AS sum_qty, sum(lineitem.l_extendedprice) AS sum_base_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS sum_disc_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax) AS sum_charge, avg(lineitem.l_quantity) AS avg_qty, avg(lineitem.l_extendedprice) AS avg_price, avg(lineitem.l_discount) AS avg_disc, count(*) AS count_order + Aggregate: groupBy=[[lineitem.l_returnflag, lineitem.l_linestatus]], aggr=[[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(__common_expr_1) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(__common_expr_1 * (Decimal128(Some(1),20,0) + lineitem.l_tax)) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(Int64(1)) AS count(*)]] + Projection: lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount) AS __common_expr_1, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_tax, lineitem.l_returnflag, lineitem.l_linestatus + Filter: lineitem.l_shipdate <= Date32("1998-09-24") + TableScan: lineitem projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], partial_filters=[lineitem.l_shipdate <= Date32("1998-09-24")] DataFusion Physical Plan ======================== SortPreservingMergeExec: [l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST] - SortExec: expr=[l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST] - ProjectionExec: expr=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus, SUM(lineitem.l_quantity)@2 as sum_qty, SUM(lineitem.l_extendedprice)@3 as sum_base_price, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@4 as sum_disc_price, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax)@5 as sum_charge, AVG(lineitem.l_quantity)@6 as avg_qty, AVG(lineitem.l_extendedprice)@7 as avg_price, AVG(lineitem.l_discount)@8 as avg_disc, COUNT(UInt8(1))@9 as count_order] - AggregateExec: mode=FinalPartitioned, gby=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus], aggr=[SUM(lineitem.l_quantity), SUM(lineitem.l_extendedprice), SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), AVG(lineitem.l_quantity), AVG(lineitem.l_extendedprice), AVG(lineitem.l_discount), COUNT(UInt8(1))] + SortExec: expr=[l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus, sum(lineitem.l_quantity)@2 as sum_qty, sum(lineitem.l_extendedprice)@3 as sum_base_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@4 as sum_disc_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax)@5 as sum_charge, avg(lineitem.l_quantity)@6 as avg_qty, avg(lineitem.l_extendedprice)@7 as avg_price, avg(lineitem.l_discount)@8 as avg_disc, count(*)@9 as count_order] + AggregateExec: mode=FinalPartitioned, gby=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(*)] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "l_returnflag", index: 0 }, Column { name: "l_linestatus", index: 1 }], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[l_returnflag@5 as l_returnflag, l_linestatus@6 as l_linestatus], aggr=[SUM(lineitem.l_quantity), SUM(lineitem.l_extendedprice), SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), AVG(lineitem.l_quantity), AVG(lineitem.l_extendedprice), AVG(lineitem.l_discount), COUNT(UInt8(1))] - ProjectionExec: expr=[CAST(l_extendedprice@1 AS Decimal128(35, 4)) * CAST(Some(100),23,2 - CAST(l_discount@2 AS Decimal128(23, 2)) AS Decimal128(35, 4)) as CAST(lineitem.l_extendedprice AS Decimal128(35, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(35, 4))CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(35, 4))Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2))CAST(lineitem.l_discount AS Decimal128(23, 2))lineitem.l_discountDecimal128(Some(100),23,2)CAST(lineitem.l_extendedprice AS Decimal128(35, 4))lineitem.l_extendedprice, l_quantity@0 as l_quantity, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_tax@3 as l_tax, l_returnflag@4 as l_returnflag, l_linestatus@5 as l_linestatus] + RepartitionExec: partitioning=Hash([l_returnflag@0, l_linestatus@1], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[l_returnflag@5 as l_returnflag, l_linestatus@6 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(*)] + ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, l_quantity@0 as l_quantity, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_tax@3 as l_tax, l_returnflag@4 as l_returnflag, l_linestatus@5 as l_linestatus] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@6 <= 10493 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, predicate=l_shipdate@10 <= 10493, pruning_predicate=l_shipdate_min@0 <= 10493, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate] + FilterExec: l_shipdate@6 <= 1998-09-24 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], predicate=l_shipdate@10 <= 1998-09-24, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@0 <= 1998-09-24 END, required_guarantees=[] RaySQL Plan =========== Query Stage #0 (4 -> 4): ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "l_returnflag", index: 0 }, Column { name: "l_linestatus", index: 1 }], 4)) - AggregateExec: mode=Partial, gby=[l_returnflag@5 as l_returnflag, l_linestatus@6 as l_linestatus], aggr=[SUM(lineitem.l_quantity), SUM(lineitem.l_extendedprice), SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), AVG(lineitem.l_quantity), AVG(lineitem.l_extendedprice), AVG(lineitem.l_discount), COUNT(UInt8(1))] - ProjectionExec: expr=[CAST(l_extendedprice@1 AS Decimal128(35, 4)) * CAST(Some(100),23,2 - CAST(l_discount@2 AS Decimal128(23, 2)) AS Decimal128(35, 4)) as CAST(lineitem.l_extendedprice AS Decimal128(35, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(35, 4))CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(35, 4))Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2))CAST(lineitem.l_discount AS Decimal128(23, 2))lineitem.l_discountDecimal128(Some(100),23,2)CAST(lineitem.l_extendedprice AS Decimal128(35, 4))lineitem.l_extendedprice, l_quantity@0 as l_quantity, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_tax@3 as l_tax, l_returnflag@4 as l_returnflag, l_linestatus@5 as l_linestatus] + AggregateExec: mode=Partial, gby=[l_returnflag@5 as l_returnflag, l_linestatus@6 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(*)] + ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, l_quantity@0 as l_quantity, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_tax@3 as l_tax, l_returnflag@4 as l_returnflag, l_linestatus@5 as l_linestatus] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@6 <= 10493 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, predicate=l_shipdate@10 <= 10493, pruning_predicate=l_shipdate_min@0 <= 10493, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate] + FilterExec: l_shipdate@6 <= 1998-09-24 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], predicate=l_shipdate@10 <= 1998-09-24, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@0 <= 1998-09-24 END, required_guarantees=[] Query Stage #1 (4 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "l_returnflag", index: 0 }, Column { name: "l_linestatus", index: 1 }], 4)) - SortExec: expr=[l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST] - ProjectionExec: expr=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus, SUM(lineitem.l_quantity)@2 as sum_qty, SUM(lineitem.l_extendedprice)@3 as sum_base_price, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@4 as sum_disc_price, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax)@5 as sum_charge, AVG(lineitem.l_quantity)@6 as avg_qty, AVG(lineitem.l_extendedprice)@7 as avg_price, AVG(lineitem.l_discount)@8 as avg_disc, COUNT(UInt8(1))@9 as count_order] - AggregateExec: mode=FinalPartitioned, gby=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus], aggr=[SUM(lineitem.l_quantity), SUM(lineitem.l_extendedprice), SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), AVG(lineitem.l_quantity), AVG(lineitem.l_extendedprice), AVG(lineitem.l_discount), COUNT(UInt8(1))] + SortExec: expr=[l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus, sum(lineitem.l_quantity)@2 as sum_qty, sum(lineitem.l_extendedprice)@3 as sum_base_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@4 as sum_disc_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax)@5 as sum_charge, avg(lineitem.l_quantity)@6 as avg_qty, avg(lineitem.l_extendedprice)@7 as avg_price, avg(lineitem.l_discount)@8 as avg_disc, count(*)@9 as count_order] + AggregateExec: mode=FinalPartitioned, gby=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(*)] CoalesceBatchesExec: target_batch_size=8192 ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "l_returnflag", index: 0 }, Column { name: "l_linestatus", index: 1 }], 4)) diff --git a/testdata/expected-plans/q10.txt b/testdata/expected-plans/q10.txt index 039c96f..607f1d7 100644 --- a/testdata/expected-plans/q10.txt +++ b/testdata/expected-plans/q10.txt @@ -3,8 +3,8 @@ DataFusion Logical Plan Limit: skip=0, fetch=20 Sort: revenue DESC NULLS FIRST, fetch=20 - Projection: customer.c_custkey, customer.c_name, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue, customer.c_acctbal, nation.n_name, customer.c_address, customer.c_phone, customer.c_comment - Aggregate: groupBy=[[customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment]], aggr=[[SUM(CAST(lineitem.l_extendedprice AS Decimal128(35, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(35, 4))) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] + Projection: customer.c_custkey, customer.c_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue, customer.c_acctbal, nation.n_name, customer.c_address, customer.c_phone, customer.c_comment + Aggregate: groupBy=[[customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment]], aggr=[[sum(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] Projection: customer.c_custkey, customer.c_name, customer.c_address, customer.c_phone, customer.c_acctbal, customer.c_comment, lineitem.l_extendedprice, lineitem.l_discount, nation.n_name Inner Join: customer.c_nationkey = nation.n_nationkey Projection: customer.c_custkey, customer.c_name, customer.c_address, customer.c_nationkey, customer.c_phone, customer.c_acctbal, customer.c_comment, lineitem.l_extendedprice, lineitem.l_discount @@ -13,8 +13,8 @@ Limit: skip=0, fetch=20 Inner Join: customer.c_custkey = orders.o_custkey TableScan: customer projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] Projection: orders.o_orderkey, orders.o_custkey - Filter: orders.o_orderdate >= Date32("8582") AND orders.o_orderdate < Date32("8674") - TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate], partial_filters=[orders.o_orderdate >= Date32("8582"), orders.o_orderdate < Date32("8674")] + Filter: orders.o_orderdate >= Date32("1993-07-01") AND orders.o_orderdate < Date32("1993-10-01") + TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate], partial_filters=[orders.o_orderdate >= Date32("1993-07-01"), orders.o_orderdate < Date32("1993-10-01")] Projection: lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount Filter: lineitem.l_returnflag = Utf8("R") TableScan: lineitem projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], partial_filters=[lineitem.l_returnflag = Utf8("R")] @@ -24,111 +24,107 @@ DataFusion Physical Plan ======================== GlobalLimitExec: skip=0, fetch=20 - SortPreservingMergeExec: [revenue@2 DESC] - SortExec: fetch=20, expr=[revenue@2 DESC] - ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@7 as revenue, c_acctbal@2 as c_acctbal, n_name@4 as n_name, c_address@5 as c_address, c_phone@3 as c_phone, c_comment@6 as c_comment] - AggregateExec: mode=FinalPartitioned, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@2 as c_acctbal, c_phone@3 as c_phone, n_name@4 as n_name, c_address@5 as c_address, c_comment@6 as c_comment], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + SortPreservingMergeExec: [revenue@2 DESC], fetch=20 + SortExec: TopK(fetch=20), expr=[revenue@2 DESC], preserve_partitioning=[true] + ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@7 as revenue, c_acctbal@2 as c_acctbal, n_name@4 as n_name, c_address@5 as c_address, c_phone@3 as c_phone, c_comment@6 as c_comment] + AggregateExec: mode=FinalPartitioned, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@2 as c_acctbal, c_phone@3 as c_phone, n_name@4 as n_name, c_address@5 as c_address, c_comment@6 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "c_custkey", index: 0 }, Column { name: "c_name", index: 1 }, Column { name: "c_acctbal", index: 2 }, Column { name: "c_phone", index: 3 }, Column { name: "n_name", index: 4 }, Column { name: "c_address", index: 5 }, Column { name: "c_comment", index: 6 }], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@4 as c_acctbal, c_phone@3 as c_phone, n_name@8 as n_name, c_address@2 as c_address, c_comment@5 as c_comment], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_address@2 as c_address, c_phone@4 as c_phone, c_acctbal@5 as c_acctbal, c_comment@6 as c_comment, l_extendedprice@7 as l_extendedprice, l_discount@8 as l_discount, n_name@10 as n_name] + RepartitionExec: partitioning=Hash([c_custkey@0, c_name@1, c_acctbal@2, c_phone@3, n_name@4, c_address@5, c_comment@6], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@4 as c_acctbal, c_phone@3 as c_phone, n_name@8 as n_name, c_address@2 as c_address, c_comment@5 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + ProjectionExec: expr=[c_custkey@1 as c_custkey, c_name@2 as c_name, c_address@3 as c_address, c_phone@4 as c_phone, c_acctbal@5 as c_acctbal, c_comment@6 as c_comment, l_extendedprice@7 as l_extendedprice, l_discount@8 as l_discount, n_name@0 as n_name] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "c_nationkey", index: 3 }, Column { name: "n_nationkey", index: 0 })] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], projection=[n_name@1, c_custkey@2, c_name@3, c_address@4, c_phone@6, c_acctbal@7, c_comment@8, l_extendedprice@9, l_discount@10] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "c_nationkey", index: 3 }], 4), input_partitions=4 - ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_address@2 as c_address, c_nationkey@3 as c_nationkey, c_phone@4 as c_phone, c_acctbal@5 as c_acctbal, c_comment@6 as c_comment, l_extendedprice@9 as l_extendedprice, l_discount@10 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "o_orderkey", index: 7 }, Column { name: "l_orderkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "o_orderkey", index: 7 }], 4), input_partitions=4 - ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_address@2 as c_address, c_nationkey@3 as c_nationkey, c_phone@4 as c_phone, c_acctbal@5 as c_acctbal, c_comment@6 as c_comment, o_orderkey@7 as o_orderkey] + RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1 + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([c_nationkey@3], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@7, l_orderkey@0)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, l_extendedprice@9, l_discount@10] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_orderkey@7], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, o_orderkey@7] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "c_custkey", index: 0 }, Column { name: "o_custkey", index: 1 })] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4), input_partitions=4 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-3.parquet]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4), input_partitions=4 - ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 >= 8582 AND o_orderdate@2 < 8674 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-3.parquet]]}, predicate=o_orderdate@4 >= 8582 AND o_orderdate@4 < 8674, pruning_predicate=o_orderdate_max@0 >= 8582 AND o_orderdate_min@1 < 8674, projection=[o_orderkey, o_custkey, o_orderdate] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4), input_partitions=4 - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] + RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_returnflag@3 = R - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, predicate=l_returnflag@8 = R, pruning_predicate=l_returnflag_min@0 <= R AND R <= l_returnflag_max@1, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4), input_partitions=1 - ParquetExec: limit=None, partitions={1 group: [[mnt/bigdata/tpch/sf10-parquet/nation.parquet/part-0.parquet]]}, projection=[n_nationkey, n_name] + RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4 + ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@2 >= 1993-07-01 AND o_orderdate@2 < 1993-10-01 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1993-07-01 AND o_orderdate@4 < 1993-10-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1993-07-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1993-10-01 END, required_guarantees=[] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_returnflag@3 = R + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], predicate=l_returnflag@8 = R, pruning_predicate=CASE WHEN l_returnflag_null_count@2 = l_returnflag_row_count@3 THEN false ELSE l_returnflag_min@0 <= R AND R <= l_returnflag_max@1 END, required_guarantees=[l_returnflag in (R)] RaySQL Plan =========== -Query Stage #0 (4 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-3.parquet]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] +Query Stage #0 (1 -> 4): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) - ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 >= 8582 AND o_orderdate@2 < 8674 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-3.parquet]]}, predicate=o_orderdate@4 >= 8582 AND o_orderdate@4 < 8674, pruning_predicate=o_orderdate_max@0 >= 8582 AND o_orderdate_min@1 < 8674, projection=[o_orderkey, o_custkey, o_orderdate] +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "o_orderkey", index: 7 }], 4)) - ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_address@2 as c_address, c_nationkey@3 as c_nationkey, c_phone@4 as c_phone, c_acctbal@5 as c_acctbal, c_comment@6 as c_comment, o_orderkey@7 as o_orderkey] +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) + ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "c_custkey", index: 0 }, Column { name: "o_custkey", index: 1 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) + FilterExec: o_orderdate@2 >= 1993-07-01 AND o_orderdate@2 < 1993-10-01 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1993-07-01 AND o_orderdate@4 < 1993-10-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1993-07-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1993-10-01 END, required_guarantees=[] Query Stage #3 (4 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_returnflag@3 = R - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, predicate=l_returnflag@8 = R, pruning_predicate=l_returnflag_min@0 <= R AND R <= l_returnflag_max@1, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag] +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "o_orderkey", index: 7 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, o_orderkey@7] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) Query Stage #4 (4 -> 4): -ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "c_nationkey", index: 3 }], 4)) - ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_address@2 as c_address, c_nationkey@3 as c_nationkey, c_phone@4 as c_phone, c_acctbal@5 as c_acctbal, c_comment@6 as c_comment, l_extendedprice@9 as l_extendedprice, l_discount@10 as l_discount] +ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "o_orderkey", index: 7 }, Column { name: "l_orderkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "o_orderkey", index: 7 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) + FilterExec: l_returnflag@3 = R + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], predicate=l_returnflag@8 = R, pruning_predicate=CASE WHEN l_returnflag_null_count@2 = l_returnflag_row_count@3 THEN false ELSE l_returnflag_min@0 <= R AND R <= l_returnflag_max@1 END, required_guarantees=[l_returnflag in (R)] -Query Stage #5 (1 -> 4): -ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ParquetExec: limit=None, partitions={1 group: [[mnt/bigdata/tpch/sf10-parquet/nation.parquet/part-0.parquet]]}, projection=[n_nationkey, n_name] +Query Stage #5 (4 -> 4): +ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "c_nationkey", index: 3 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@7, l_orderkey@0)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, l_extendedprice@9, l_discount@10] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "o_orderkey", index: 7 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) Query Stage #6 (4 -> 4): ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }, Column { name: "c_name", index: 1 }, Column { name: "c_acctbal", index: 2 }, Column { name: "c_phone", index: 3 }, Column { name: "n_name", index: 4 }, Column { name: "c_address", index: 5 }, Column { name: "c_comment", index: 6 }], 4)) - AggregateExec: mode=Partial, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@4 as c_acctbal, c_phone@3 as c_phone, n_name@8 as n_name, c_address@2 as c_address, c_comment@5 as c_comment], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_address@2 as c_address, c_phone@4 as c_phone, c_acctbal@5 as c_acctbal, c_comment@6 as c_comment, l_extendedprice@7 as l_extendedprice, l_discount@8 as l_discount, n_name@10 as n_name] + AggregateExec: mode=Partial, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@4 as c_acctbal, c_phone@3 as c_phone, n_name@8 as n_name, c_address@2 as c_address, c_comment@5 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + ProjectionExec: expr=[c_custkey@1 as c_custkey, c_name@2 as c_name, c_address@3 as c_address, c_phone@4 as c_phone, c_acctbal@5 as c_acctbal, c_comment@6 as c_comment, l_extendedprice@7 as l_extendedprice, l_discount@8 as l_discount, n_name@0 as n_name] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "c_nationkey", index: 3 }, Column { name: "n_nationkey", index: 0 })] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], projection=[n_name@1, c_custkey@2, c_name@3, c_address@4, c_phone@6, c_acctbal@7, c_comment@8, l_extendedprice@9, l_discount@10] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "c_nationkey", index: 3 }], 4)) + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "c_nationkey", index: 3 }], 4)) Query Stage #7 (4 -> 4): ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }, Column { name: "c_name", index: 1 }, Column { name: "c_acctbal", index: 3 }, Column { name: "c_phone", index: 6 }, Column { name: "n_name", index: 4 }, Column { name: "c_address", index: 5 }, Column { name: "c_comment", index: 7 }], 4)) - SortExec: fetch=20, expr=[revenue@2 DESC] - ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@7 as revenue, c_acctbal@2 as c_acctbal, n_name@4 as n_name, c_address@5 as c_address, c_phone@3 as c_phone, c_comment@6 as c_comment] - AggregateExec: mode=FinalPartitioned, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@2 as c_acctbal, c_phone@3 as c_phone, n_name@4 as n_name, c_address@5 as c_address, c_comment@6 as c_comment], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + SortExec: TopK(fetch=20), expr=[revenue@2 DESC], preserve_partitioning=[true] + ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@7 as revenue, c_acctbal@2 as c_acctbal, n_name@4 as n_name, c_address@5 as c_address, c_phone@3 as c_phone, c_comment@6 as c_comment] + AggregateExec: mode=FinalPartitioned, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@2 as c_acctbal, c_phone@3 as c_phone, n_name@4 as n_name, c_address@5 as c_address, c_comment@6 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] CoalesceBatchesExec: target_batch_size=8192 ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }, Column { name: "c_name", index: 1 }, Column { name: "c_acctbal", index: 2 }, Column { name: "c_phone", index: 3 }, Column { name: "n_name", index: 4 }, Column { name: "c_address", index: 5 }, Column { name: "c_comment", index: 6 }], 4)) Query Stage #8 (1 -> 1): GlobalLimitExec: skip=0, fetch=20 - SortPreservingMergeExec: [revenue@2 DESC] + SortPreservingMergeExec: [revenue@2 DESC], fetch=20 ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }, Column { name: "c_name", index: 1 }, Column { name: "c_acctbal", index: 3 }, Column { name: "c_phone", index: 6 }, Column { name: "n_name", index: 4 }, Column { name: "c_address", index: 5 }, Column { name: "c_comment", index: 7 }], 4)) diff --git a/testdata/expected-plans/q11.txt b/testdata/expected-plans/q11.txt index 98c619a..ff6c98f 100644 --- a/testdata/expected-plans/q11.txt +++ b/testdata/expected-plans/q11.txt @@ -2,187 +2,176 @@ DataFusion Logical Plan ======================= Sort: value DESC NULLS FIRST - Projection: partsupp.ps_partkey, SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS value - Filter: CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS Decimal128(38, 15)) > CAST(__scalar_sq_1.__value AS Decimal128(38, 15)) - CrossJoin: - Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[SUM(CAST(partsupp.ps_supplycost AS Decimal128(22, 2)) * CAST(partsupp.ps_availqty AS Decimal128(22, 2)))]] - Projection: partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost - Inner Join: supplier.s_nationkey = nation.n_nationkey - Projection: partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey - Inner Join: partsupp.ps_suppkey = supplier.s_suppkey - TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] - TableScan: supplier projection=[s_suppkey, s_nationkey] - Projection: nation.n_nationkey - Filter: nation.n_name = Utf8("ALGERIA") - TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("ALGERIA")] - SubqueryAlias: __scalar_sq_1 - Projection: CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS Float64) * Float64(0.0001) AS __value - Aggregate: groupBy=[[]], aggr=[[SUM(CAST(partsupp.ps_supplycost AS Decimal128(22, 2)) * CAST(partsupp.ps_availqty AS Decimal128(22, 2)))]] - Projection: partsupp.ps_availqty, partsupp.ps_supplycost - Inner Join: supplier.s_nationkey = nation.n_nationkey - Projection: partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey - Inner Join: partsupp.ps_suppkey = supplier.s_suppkey - TableScan: partsupp projection=[ps_suppkey, ps_availqty, ps_supplycost] - TableScan: supplier projection=[s_suppkey, s_nationkey] - Projection: nation.n_nationkey - Filter: nation.n_name = Utf8("ALGERIA") - TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("ALGERIA")] + Projection: partsupp.ps_partkey, sum(partsupp.ps_supplycost * partsupp.ps_availqty) AS value + Inner Join: Filter: CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty) AS Decimal128(38, 15)) > __scalar_sq_1.sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001) + Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[sum(partsupp.ps_supplycost * CAST(partsupp.ps_availqty AS Decimal128(10, 0)))]] + Projection: partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost + Inner Join: supplier.s_nationkey = nation.n_nationkey + Projection: partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey + Inner Join: partsupp.ps_suppkey = supplier.s_suppkey + TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] + TableScan: supplier projection=[s_suppkey, s_nationkey] + Projection: nation.n_nationkey + Filter: nation.n_name = Utf8("ALGERIA") + TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("ALGERIA")] + SubqueryAlias: __scalar_sq_1 + Projection: CAST(CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty) AS Float64) * Float64(0.0001) AS Decimal128(38, 15)) + Aggregate: groupBy=[[]], aggr=[[sum(partsupp.ps_supplycost * CAST(partsupp.ps_availqty AS Decimal128(10, 0)))]] + Projection: partsupp.ps_availqty, partsupp.ps_supplycost + Inner Join: supplier.s_nationkey = nation.n_nationkey + Projection: partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey + Inner Join: partsupp.ps_suppkey = supplier.s_suppkey + TableScan: partsupp projection=[ps_suppkey, ps_availqty, ps_supplycost] + TableScan: supplier projection=[s_suppkey, s_nationkey] + Projection: nation.n_nationkey + Filter: nation.n_name = Utf8("ALGERIA") + TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("ALGERIA")] DataFusion Physical Plan ======================== -SortExec: expr=[value@1 DESC] - ProjectionExec: expr=[ps_partkey@0 as ps_partkey, SUM(partsupp.ps_supplycost * partsupp.ps_availqty)@1 as value] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty)@1 AS Decimal128(38, 15)) > CAST(__value@2 AS Decimal128(38, 15)) - CrossJoinExec - CoalescePartitionsExec - AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)] - ProjectionExec: expr=[ps_partkey@0 as ps_partkey, ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 3 }, Column { name: "n_nationkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4), input_partitions=4 - ProjectionExec: expr=[ps_partkey@0 as ps_partkey, ps_availqty@2 as ps_availqty, ps_supplycost@3 as ps_supplycost, s_nationkey@5 as s_nationkey] - ProjectionExec: expr=[ps_partkey@2 as ps_partkey, ps_suppkey@3 as ps_suppkey, ps_availqty@4 as ps_availqty, ps_supplycost@5 as ps_supplycost, s_suppkey@0 as s_suppkey, s_nationkey@1 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_suppkey", index: 0 }, Column { name: "ps_suppkey", index: 1 })] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4), input_partitions=4 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-3.parquet]]}, projection=[s_suppkey, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "ps_suppkey", index: 1 }], 4), input_partitions=4 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-3.parquet]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] +SortPreservingMergeExec: [value@1 DESC] + SortExec: expr=[value@1 DESC], preserve_partitioning=[true] + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, sum(partsupp.ps_supplycost * partsupp.ps_availqty)@2 as value] + NestedLoopJoinExec: join_type=Inner, filter=CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Decimal128(38, 15)) > sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)@1 + ProjectionExec: expr=[CAST(CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 AS Decimal128(38, 15)) as sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)] + AggregateExec: mode=Final, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] + CoalescePartitionsExec + AggregateExec: mode=Partial, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[ps_availqty@1, ps_supplycost@2] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 + ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = ALGERIA + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4 + ProjectionExec: expr=[ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 - ProjectionExec: expr=[n_nationkey@0 as n_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = ALGERIA - ParquetExec: limit=None, partitions={1 group: [[mnt/bigdata/tpch/sf10-parquet/nation.parquet/part-0.parquet]]}, predicate=n_name@1 = ALGERIA, pruning_predicate=n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1, projection=[n_nationkey, n_name] - ProjectionExec: expr=[CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 as __value] - AggregateExec: mode=Final, gby=[], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)] - CoalescePartitionsExec - AggregateExec: mode=Partial, gby=[], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)] - ProjectionExec: expr=[ps_availqty@0 as ps_availqty, ps_supplycost@1 as ps_supplycost] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_nationkey@1, ps_availqty@3, ps_supplycost@4] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost] + AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[ps_partkey@1, ps_availqty@2, ps_supplycost@3] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 + ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = ALGERIA + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 2 }, Column { name: "n_nationkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4), input_partitions=4 - ProjectionExec: expr=[ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost, s_nationkey@4 as s_nationkey] - ProjectionExec: expr=[ps_suppkey@2 as ps_suppkey, ps_availqty@3 as ps_availqty, ps_supplycost@4 as ps_supplycost, s_suppkey@0 as s_suppkey, s_nationkey@1 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_suppkey", index: 0 }, Column { name: "ps_suppkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4), input_partitions=4 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-3.parquet]]}, projection=[s_suppkey, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4), input_partitions=4 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-3.parquet]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 - ProjectionExec: expr=[n_nationkey@0 as n_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = ALGERIA - ParquetExec: limit=None, partitions={1 group: [[mnt/bigdata/tpch/sf10-parquet/nation.parquet/part-0.parquet]]}, predicate=n_name@1 = ALGERIA, pruning_predicate=n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1, projection=[n_nationkey, n_name] + RepartitionExec: partitioning=Hash([s_nationkey@3], 4), input_partitions=4 + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_availqty@2 as ps_availqty, ps_supplycost@3 as ps_supplycost, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_availqty@4, ps_supplycost@5] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([ps_suppkey@1], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] RaySQL Plan =========== -Query Stage #0 (4 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-3.parquet]]}, projection=[s_suppkey, s_nationkey] +Query Stage #0 (1 -> 4): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = ALGERIA + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] -Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }], 4)) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-3.parquet]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] +Query Stage #1 (2 -> 4): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) + ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4)) - ProjectionExec: expr=[ps_partkey@0 as ps_partkey, ps_availqty@2 as ps_availqty, ps_supplycost@3 as ps_supplycost, s_nationkey@5 as s_nationkey] - ProjectionExec: expr=[ps_partkey@2 as ps_partkey, ps_suppkey@3 as ps_suppkey, ps_availqty@4 as ps_availqty, ps_supplycost@5 as ps_supplycost, s_suppkey@0 as s_suppkey, s_nationkey@1 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_suppkey", index: 0 }, Column { name: "ps_suppkey", index: 1 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }], 4)) +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4)) + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost] -Query Stage #3 (1 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) +Query Stage #3 (4 -> 4): +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) + ProjectionExec: expr=[ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_nationkey@1, ps_availqty@3, ps_supplycost@4] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4)) + +Query Stage #4 (4 -> 1): +ShuffleWriterExec(stage_id=4, output_partitioning=Hash([], 4)) + AggregateExec: mode=Partial, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[ps_availqty@1, ps_supplycost@2] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) + +Query Stage #5 (1 -> 4): +ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) ProjectionExec: expr=[n_nationkey@0 as n_nationkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = ALGERIA - ParquetExec: limit=None, partitions={1 group: [[mnt/bigdata/tpch/sf10-parquet/nation.parquet/part-0.parquet]]}, predicate=n_name@1 = ALGERIA, pruning_predicate=n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1, projection=[n_nationkey, n_name] - -Query Stage #4 (4 -> 4): -ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) - AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)] - ProjectionExec: expr=[ps_partkey@0 as ps_partkey, ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 3 }, Column { name: "n_nationkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] -Query Stage #5 (4 -> 4): -ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) - AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) - -Query Stage #6 (4 -> 4): +Query Stage #6 (2 -> 4): ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-3.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] Query Stage #7 (4 -> 4): -ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4)) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-3.parquet]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost] +ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }], 4)) + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] Query Stage #8 (4 -> 4): -ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) - ProjectionExec: expr=[ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost, s_nationkey@4 as s_nationkey] - ProjectionExec: expr=[ps_suppkey@2 as ps_suppkey, ps_availqty@3 as ps_availqty, ps_supplycost@4 as ps_supplycost, s_suppkey@0 as s_suppkey, s_nationkey@1 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_suppkey", index: 0 }, Column { name: "ps_suppkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4)) - -Query Stage #9 (1 -> 4): -ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ProjectionExec: expr=[n_nationkey@0 as n_nationkey] +ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4)) + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_availqty@2 as ps_availqty, ps_supplycost@3 as ps_supplycost, s_nationkey@0 as s_nationkey] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = ALGERIA - ParquetExec: limit=None, partitions={1 group: [[mnt/bigdata/tpch/sf10-parquet/nation.parquet/part-0.parquet]]}, predicate=n_name@1 = ALGERIA, pruning_predicate=n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1, projection=[n_nationkey, n_name] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_availqty@4, ps_supplycost@5] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }], 4)) + +Query Stage #9 (4 -> 4): +ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) + AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[ps_partkey@1, ps_availqty@2, ps_supplycost@3] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4)) Query Stage #10 (4 -> 4): -ShuffleWriterExec(stage_id=10, output_partitioning=Hash([], 4)) - AggregateExec: mode=Partial, gby=[], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)] - ProjectionExec: expr=[ps_availqty@0 as ps_availqty, ps_supplycost@1 as ps_supplycost] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 2 }, Column { name: "n_nationkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) +ShuffleWriterExec(stage_id=10, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) + SortExec: expr=[value@1 DESC], preserve_partitioning=[true] + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, sum(partsupp.ps_supplycost * partsupp.ps_availqty)@2 as value] + NestedLoopJoinExec: join_type=Inner, filter=CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Decimal128(38, 15)) > sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)@1 + ProjectionExec: expr=[CAST(CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 AS Decimal128(38, 15)) as sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)] + AggregateExec: mode=Final, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] + CoalescePartitionsExec + ShuffleReaderExec(stage_id=4, input_partitioning=Hash([], 4)) + AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) -Query Stage #11 (1 -> 1): -SortExec: expr=[value@1 DESC] - ProjectionExec: expr=[ps_partkey@0 as ps_partkey, SUM(partsupp.ps_supplycost * partsupp.ps_availqty)@1 as value] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty)@1 AS Decimal128(38, 15)) > CAST(__value@2 AS Decimal128(38, 15)) - CrossJoinExec - CoalescePartitionsExec - ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) - ProjectionExec: expr=[CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 as __value] - AggregateExec: mode=Final, gby=[], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)] - CoalescePartitionsExec - ShuffleReaderExec(stage_id=10, input_partitioning=Hash([], 4)) +Query Stage #11 (4 -> 1): +SortPreservingMergeExec: [value@1 DESC] + ShuffleReaderExec(stage_id=10, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) diff --git a/testdata/expected-plans/q12.txt b/testdata/expected-plans/q12.txt index 2ba8687..d2f81fb 100644 --- a/testdata/expected-plans/q12.txt +++ b/testdata/expected-plans/q12.txt @@ -2,68 +2,68 @@ DataFusion Logical Plan ======================= Sort: lineitem.l_shipmode ASC NULLS LAST - Projection: lineitem.l_shipmode, SUM(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END) AS high_line_count, SUM(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END) AS low_line_count - Aggregate: groupBy=[[lineitem.l_shipmode]], aggr=[[SUM(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), SUM(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)]] + Projection: lineitem.l_shipmode, sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END) AS high_line_count, sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END) AS low_line_count + Aggregate: groupBy=[[lineitem.l_shipmode]], aggr=[[sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)]] Projection: orders.o_orderpriority, lineitem.l_shipmode Inner Join: orders.o_orderkey = lineitem.l_orderkey TableScan: orders projection=[o_orderkey, o_orderpriority] Projection: lineitem.l_orderkey, lineitem.l_shipmode - Filter: (lineitem.l_shipmode = Utf8("SHIP") OR lineitem.l_shipmode = Utf8("FOB")) AND lineitem.l_commitdate < lineitem.l_receiptdate AND lineitem.l_shipdate < lineitem.l_commitdate AND lineitem.l_receiptdate >= Date32("9131") AND lineitem.l_receiptdate < Date32("9496") - TableScan: lineitem projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode], partial_filters=[lineitem.l_shipmode = Utf8("SHIP") OR lineitem.l_shipmode = Utf8("FOB"), lineitem.l_commitdate < lineitem.l_receiptdate, lineitem.l_shipdate < lineitem.l_commitdate, lineitem.l_receiptdate >= Date32("9131"), lineitem.l_receiptdate < Date32("9496")] + Filter: (lineitem.l_shipmode = Utf8("FOB") OR lineitem.l_shipmode = Utf8("SHIP")) AND lineitem.l_receiptdate > lineitem.l_commitdate AND lineitem.l_shipdate < lineitem.l_commitdate AND lineitem.l_receiptdate >= Date32("1995-01-01") AND lineitem.l_receiptdate < Date32("1996-01-01") + TableScan: lineitem projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode], partial_filters=[lineitem.l_shipmode = Utf8("FOB") OR lineitem.l_shipmode = Utf8("SHIP"), lineitem.l_receiptdate > lineitem.l_commitdate, lineitem.l_shipdate < lineitem.l_commitdate, lineitem.l_receiptdate >= Date32("1995-01-01"), lineitem.l_receiptdate < Date32("1996-01-01")] DataFusion Physical Plan ======================== SortPreservingMergeExec: [l_shipmode@0 ASC NULLS LAST] - SortExec: expr=[l_shipmode@0 ASC NULLS LAST] - ProjectionExec: expr=[l_shipmode@0 as l_shipmode, SUM(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)@1 as high_line_count, SUM(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)@2 as low_line_count] - AggregateExec: mode=FinalPartitioned, gby=[l_shipmode@0 as l_shipmode], aggr=[SUM(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), SUM(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)] + SortExec: expr=[l_shipmode@0 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[l_shipmode@0 as l_shipmode, sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)@1 as high_line_count, sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)@2 as low_line_count] + AggregateExec: mode=FinalPartitioned, gby=[l_shipmode@0 as l_shipmode], aggr=[sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "l_shipmode", index: 0 }], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[l_shipmode@1 as l_shipmode], aggr=[SUM(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), SUM(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)] - ProjectionExec: expr=[o_orderpriority@1 as o_orderpriority, l_shipmode@3 as l_shipmode] + RepartitionExec: partitioning=Hash([l_shipmode@0], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[l_shipmode@1 as l_shipmode], aggr=[sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)] + ProjectionExec: expr=[o_orderpriority@1 as o_orderpriority, l_shipmode@0 as l_shipmode] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "o_orderkey", index: 0 }, Column { name: "l_orderkey", index: 0 })] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_orderkey@0, o_orderkey@0)], projection=[l_shipmode@1, o_orderpriority@3] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4), input_partitions=4 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-3.parquet]]}, projection=[o_orderkey, o_orderpriority] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4), input_partitions=4 + RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_shipmode@4 as l_shipmode] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: (l_shipmode@4 = SHIP OR l_shipmode@4 = FOB) AND l_commitdate@2 < l_receiptdate@3 AND l_shipdate@1 < l_commitdate@2 AND l_receiptdate@3 >= 9131 AND l_receiptdate@3 < 9496 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, predicate=(l_shipmode@14 = SHIP OR l_shipmode@14 = FOB) AND l_commitdate@11 < l_receiptdate@12 AND l_shipdate@10 < l_commitdate@11 AND l_receiptdate@12 >= 9131 AND l_receiptdate@12 < 9496, pruning_predicate=(l_shipmode_min@0 <= SHIP AND SHIP <= l_shipmode_max@1 OR l_shipmode_min@0 <= FOB AND FOB <= l_shipmode_max@1) AND l_receiptdate_max@2 >= 9131 AND l_receiptdate_min@3 < 9496, projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode] + FilterExec: (l_shipmode@4 = FOB OR l_shipmode@4 = SHIP) AND l_receiptdate@3 > l_commitdate@2 AND l_shipdate@1 < l_commitdate@2 AND l_receiptdate@3 >= 1995-01-01 AND l_receiptdate@3 < 1996-01-01 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode], predicate=(l_shipmode@14 = FOB OR l_shipmode@14 = SHIP) AND l_receiptdate@12 > l_commitdate@11 AND l_shipdate@10 < l_commitdate@11 AND l_receiptdate@12 >= 1995-01-01 AND l_receiptdate@12 < 1996-01-01, pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= FOB AND FOB <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= SHIP AND SHIP <= l_shipmode_max@1 END) AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_max@4 >= 1995-01-01 END AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_min@7 < 1996-01-01 END, required_guarantees=[l_shipmode in (SHIP, FOB)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_orderpriority] RaySQL Plan =========== Query Stage #0 (4 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-3.parquet]]}, projection=[o_orderkey, o_orderpriority] - -Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_shipmode@4 as l_shipmode] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: (l_shipmode@4 = SHIP OR l_shipmode@4 = FOB) AND l_commitdate@2 < l_receiptdate@3 AND l_shipdate@1 < l_commitdate@2 AND l_receiptdate@3 >= 9131 AND l_receiptdate@3 < 9496 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, predicate=(l_shipmode@14 = SHIP OR l_shipmode@14 = FOB) AND l_commitdate@11 < l_receiptdate@12 AND l_shipdate@10 < l_commitdate@11 AND l_receiptdate@12 >= 9131 AND l_receiptdate@12 < 9496, pruning_predicate=(l_shipmode_min@0 <= SHIP AND SHIP <= l_shipmode_max@1 OR l_shipmode_min@0 <= FOB AND FOB <= l_shipmode_max@1) AND l_receiptdate_max@2 >= 9131 AND l_receiptdate_min@3 < 9496, projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode] + FilterExec: (l_shipmode@4 = FOB OR l_shipmode@4 = SHIP) AND l_receiptdate@3 > l_commitdate@2 AND l_shipdate@1 < l_commitdate@2 AND l_receiptdate@3 >= 1995-01-01 AND l_receiptdate@3 < 1996-01-01 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode], predicate=(l_shipmode@14 = FOB OR l_shipmode@14 = SHIP) AND l_receiptdate@12 > l_commitdate@11 AND l_shipdate@10 < l_commitdate@11 AND l_receiptdate@12 >= 1995-01-01 AND l_receiptdate@12 < 1996-01-01, pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= FOB AND FOB <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= SHIP AND SHIP <= l_shipmode_max@1 END) AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_max@4 >= 1995-01-01 END AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_min@7 < 1996-01-01 END, required_guarantees=[l_shipmode in (SHIP, FOB)] + +Query Stage #1 (4 -> 4): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_orderpriority] Query Stage #2 (4 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "l_shipmode", index: 0 }], 4)) - AggregateExec: mode=Partial, gby=[l_shipmode@1 as l_shipmode], aggr=[SUM(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), SUM(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)] - ProjectionExec: expr=[o_orderpriority@1 as o_orderpriority, l_shipmode@3 as l_shipmode] + AggregateExec: mode=Partial, gby=[l_shipmode@1 as l_shipmode], aggr=[sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)] + ProjectionExec: expr=[o_orderpriority@1 as o_orderpriority, l_shipmode@0 as l_shipmode] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "o_orderkey", index: 0 }, Column { name: "l_orderkey", index: 0 })] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_orderkey@0, o_orderkey@0)], projection=[l_shipmode@1, o_orderpriority@3] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) Query Stage #3 (4 -> 4): ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "l_shipmode", index: 0 }], 4)) - SortExec: expr=[l_shipmode@0 ASC NULLS LAST] - ProjectionExec: expr=[l_shipmode@0 as l_shipmode, SUM(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)@1 as high_line_count, SUM(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)@2 as low_line_count] - AggregateExec: mode=FinalPartitioned, gby=[l_shipmode@0 as l_shipmode], aggr=[SUM(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), SUM(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)] + SortExec: expr=[l_shipmode@0 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[l_shipmode@0 as l_shipmode, sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)@1 as high_line_count, sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)@2 as low_line_count] + AggregateExec: mode=FinalPartitioned, gby=[l_shipmode@0 as l_shipmode], aggr=[sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)] CoalesceBatchesExec: target_batch_size=8192 ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "l_shipmode", index: 0 }], 4)) diff --git a/testdata/expected-plans/q13.txt b/testdata/expected-plans/q13.txt index 9d2d3ab..aaaea0f 100644 --- a/testdata/expected-plans/q13.txt +++ b/testdata/expected-plans/q13.txt @@ -1,81 +1,74 @@ DataFusion Logical Plan ======================= -Sort: custdist DESC NULLS FIRST, c_count DESC NULLS FIRST - Projection: c_count, COUNT(UInt8(1)) AS custdist - Aggregate: groupBy=[[c_count]], aggr=[[COUNT(UInt8(1))]] - Projection: c_orders.COUNT(orders.o_orderkey) AS c_count - SubqueryAlias: c_orders - Projection: COUNT(orders.o_orderkey) - Aggregate: groupBy=[[customer.c_custkey]], aggr=[[COUNT(orders.o_orderkey)]] - Projection: customer.c_custkey, orders.o_orderkey - Left Join: customer.c_custkey = orders.o_custkey - TableScan: customer projection=[c_custkey] - Projection: orders.o_orderkey, orders.o_custkey - Filter: orders.o_comment NOT LIKE Utf8("%express%requests%") - TableScan: orders projection=[o_orderkey, o_custkey, o_comment], partial_filters=[orders.o_comment NOT LIKE Utf8("%express%requests%")] +Sort: custdist DESC NULLS FIRST, c_orders.c_count DESC NULLS FIRST + Projection: c_orders.c_count, count(*) AS custdist + Aggregate: groupBy=[[c_orders.c_count]], aggr=[[count(Int64(1)) AS count(*)]] + SubqueryAlias: c_orders + Projection: count(orders.o_orderkey) AS c_count + Aggregate: groupBy=[[customer.c_custkey]], aggr=[[count(orders.o_orderkey)]] + Projection: customer.c_custkey, orders.o_orderkey + Left Join: customer.c_custkey = orders.o_custkey + TableScan: customer projection=[c_custkey] + Projection: orders.o_orderkey, orders.o_custkey + Filter: orders.o_comment NOT LIKE Utf8("%express%requests%") + TableScan: orders projection=[o_orderkey, o_custkey, o_comment], partial_filters=[orders.o_comment NOT LIKE Utf8("%express%requests%")] DataFusion Physical Plan ======================== SortPreservingMergeExec: [custdist@1 DESC,c_count@0 DESC] - SortExec: expr=[custdist@1 DESC,c_count@0 DESC] - ProjectionExec: expr=[c_count@0 as c_count, COUNT(UInt8(1))@1 as custdist] - AggregateExec: mode=FinalPartitioned, gby=[c_count@0 as c_count], aggr=[COUNT(UInt8(1))] + SortExec: expr=[custdist@1 DESC,c_count@0 DESC], preserve_partitioning=[true] + ProjectionExec: expr=[c_count@0 as c_count, count(*)@1 as custdist] + AggregateExec: mode=FinalPartitioned, gby=[c_count@0 as c_count], aggr=[count(*)] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "c_count", index: 0 }], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[c_count@0 as c_count], aggr=[COUNT(UInt8(1))] - ProjectionExec: expr=[COUNT(orders.o_orderkey)@0 as c_count] - ProjectionExec: expr=[COUNT(orders.o_orderkey)@1 as COUNT(orders.o_orderkey)] - AggregateExec: mode=FinalPartitioned, gby=[c_custkey@0 as c_custkey], aggr=[COUNT(orders.o_orderkey)] - AggregateExec: mode=Partial, gby=[c_custkey@0 as c_custkey], aggr=[COUNT(orders.o_orderkey)] - ProjectionExec: expr=[c_custkey@0 as c_custkey, o_orderkey@1 as o_orderkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Left, on=[(Column { name: "c_custkey", index: 0 }, Column { name: "o_custkey", index: 1 })] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4), input_partitions=4 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-3.parquet]]}, projection=[c_custkey] + RepartitionExec: partitioning=Hash([c_count@0], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[c_count@0 as c_count], aggr=[count(*)] + ProjectionExec: expr=[count(orders.o_orderkey)@1 as c_count] + AggregateExec: mode=SinglePartitioned, gby=[c_custkey@0 as c_custkey], aggr=[count(orders.o_orderkey)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Left, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, o_orderkey@1] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4 + ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4), input_partitions=4 - ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_comment@2 NOT LIKE %express%requests% - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-3.parquet]]}, predicate=o_comment@8 NOT LIKE %express%requests%, projection=[o_orderkey, o_custkey, o_comment] + FilterExec: o_comment@2 NOT LIKE %express%requests% + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_comment], predicate=o_comment@8 NOT LIKE %express%requests% RaySQL Plan =========== Query Stage #0 (4 -> 4): ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-3.parquet]]}, projection=[c_custkey] + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey] Query Stage #1 (4 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_comment@2 NOT LIKE %express%requests% - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-3.parquet]]}, predicate=o_comment@8 NOT LIKE %express%requests%, projection=[o_orderkey, o_custkey, o_comment] + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_comment], predicate=o_comment@8 NOT LIKE %express%requests% Query Stage #2 (4 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "c_count", index: 0 }], 4)) - AggregateExec: mode=Partial, gby=[c_count@0 as c_count], aggr=[COUNT(UInt8(1))] - ProjectionExec: expr=[COUNT(orders.o_orderkey)@0 as c_count] - ProjectionExec: expr=[COUNT(orders.o_orderkey)@1 as COUNT(orders.o_orderkey)] - AggregateExec: mode=FinalPartitioned, gby=[c_custkey@0 as c_custkey], aggr=[COUNT(orders.o_orderkey)] - AggregateExec: mode=Partial, gby=[c_custkey@0 as c_custkey], aggr=[COUNT(orders.o_orderkey)] - ProjectionExec: expr=[c_custkey@0 as c_custkey, o_orderkey@1 as o_orderkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Left, on=[(Column { name: "c_custkey", index: 0 }, Column { name: "o_custkey", index: 1 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) + AggregateExec: mode=Partial, gby=[c_count@0 as c_count], aggr=[count(*)] + ProjectionExec: expr=[count(orders.o_orderkey)@1 as c_count] + AggregateExec: mode=SinglePartitioned, gby=[c_custkey@0 as c_custkey], aggr=[count(orders.o_orderkey)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Left, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, o_orderkey@1] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) Query Stage #3 (4 -> 4): ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "c_count", index: 0 }], 4)) - SortExec: expr=[custdist@1 DESC,c_count@0 DESC] - ProjectionExec: expr=[c_count@0 as c_count, COUNT(UInt8(1))@1 as custdist] - AggregateExec: mode=FinalPartitioned, gby=[c_count@0 as c_count], aggr=[COUNT(UInt8(1))] + SortExec: expr=[custdist@1 DESC,c_count@0 DESC], preserve_partitioning=[true] + ProjectionExec: expr=[c_count@0 as c_count, count(*)@1 as custdist] + AggregateExec: mode=FinalPartitioned, gby=[c_count@0 as c_count], aggr=[count(*)] CoalesceBatchesExec: target_batch_size=8192 ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "c_count", index: 0 }], 4)) diff --git a/testdata/expected-plans/q14.txt b/testdata/expected-plans/q14.txt index 54aaed3..aa81525 100644 --- a/testdata/expected-plans/q14.txt +++ b/testdata/expected-plans/q14.txt @@ -1,63 +1,64 @@ DataFusion Logical Plan ======================= -Projection: Float64(100) * CAST(SUM(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END) AS Float64) / CAST(SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS Float64) AS promo_revenue - Aggregate: groupBy=[[]], aggr=[[SUM(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN CAST(lineitem.l_extendedprice AS Decimal128(35, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(35, 4))CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(35, 4))Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2))CAST(lineitem.l_discount AS Decimal128(23, 2))lineitem.l_discountDecimal128(Some(100),23,2)CAST(lineitem.l_extendedprice AS Decimal128(35, 4))lineitem.l_extendedprice AS lineitem.l_extendedprice * Decimal128(Some(100),23,2) - lineitem.l_discount ELSE Decimal128(Some(0),35,4) END) AS SUM(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), SUM(CAST(lineitem.l_extendedprice AS Decimal128(35, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(35, 4))CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(35, 4))Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2))CAST(lineitem.l_discount AS Decimal128(23, 2))lineitem.l_discountDecimal128(Some(100),23,2)CAST(lineitem.l_extendedprice AS Decimal128(35, 4))lineitem.l_extendedprice AS lineitem.l_extendedprice * Decimal128(Some(100),23,2) - lineitem.l_discount) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] - Projection: CAST(lineitem.l_extendedprice AS Decimal128(35, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(35, 4)) AS CAST(lineitem.l_extendedprice AS Decimal128(35, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(35, 4))CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(35, 4))Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2))CAST(lineitem.l_discount AS Decimal128(23, 2))lineitem.l_discountDecimal128(Some(100),23,2)CAST(lineitem.l_extendedprice AS Decimal128(35, 4))lineitem.l_extendedprice, part.p_type +Projection: Float64(100) * CAST(sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END) AS Float64) / CAST(sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS Float64) AS promo_revenue + Aggregate: groupBy=[[]], aggr=[[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN __common_expr_1 ELSE Decimal128(Some(0),35,4) END) AS sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(__common_expr_1) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] + Projection: lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount) AS __common_expr_1, part.p_type Inner Join: lineitem.l_partkey = part.p_partkey Projection: lineitem.l_partkey, lineitem.l_extendedprice, lineitem.l_discount - Filter: lineitem.l_shipdate >= Date32("9162") AND lineitem.l_shipdate < Date32("9190") - TableScan: lineitem projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], partial_filters=[lineitem.l_shipdate >= Date32("9162"), lineitem.l_shipdate < Date32("9190")] + Filter: lineitem.l_shipdate >= Date32("1995-02-01") AND lineitem.l_shipdate < Date32("1995-03-01") + TableScan: lineitem projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], partial_filters=[lineitem.l_shipdate >= Date32("1995-02-01"), lineitem.l_shipdate < Date32("1995-03-01")] TableScan: part projection=[p_partkey, p_type] DataFusion Physical Plan ======================== -ProjectionExec: expr=[100 * CAST(SUM(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END)@0 AS Float64) / CAST(SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 AS Float64) as promo_revenue] - AggregateExec: mode=Final, gby=[], aggr=[SUM(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] +ProjectionExec: expr=[100 * CAST(sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END)@0 AS Float64) / CAST(sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 AS Float64) as promo_revenue] + AggregateExec: mode=Final, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] CoalescePartitionsExec - AggregateExec: mode=Partial, gby=[], aggr=[SUM(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - ProjectionExec: expr=[CAST(l_extendedprice@1 AS Decimal128(35, 4)) * CAST(Some(100),23,2 - CAST(l_discount@2 AS Decimal128(23, 2)) AS Decimal128(35, 4)) as CAST(lineitem.l_extendedprice AS Decimal128(35, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(35, 4))CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(35, 4))Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2))CAST(lineitem.l_discount AS Decimal128(23, 2))lineitem.l_discountDecimal128(Some(100),23,2)CAST(lineitem.l_extendedprice AS Decimal128(35, 4))lineitem.l_extendedprice, p_type@4 as p_type] + AggregateExec: mode=Partial, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, p_type@0 as p_type] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_partkey", index: 0 }, Column { name: "p_partkey", index: 0 })] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_type@1, l_extendedprice@3, l_discount@4] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4), input_partitions=4 + RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/part.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_type] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4 ProjectionExec: expr=[l_partkey@0 as l_partkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@3 >= 9162 AND l_shipdate@3 < 9190 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, predicate=l_shipdate@10 >= 9162 AND l_shipdate@10 < 9190, pruning_predicate=l_shipdate_max@0 >= 9162 AND l_shipdate_min@1 < 9190, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4), input_partitions=4 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/part.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-3.parquet]]}, projection=[p_partkey, p_type] + FilterExec: l_shipdate@3 >= 1995-02-01 AND l_shipdate@3 < 1995-03-01 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-02-01 AND l_shipdate@10 < 1995-03-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-02-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-03-01 END, required_guarantees=[] RaySQL Plan =========== -Query Stage #0 (4 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) - ProjectionExec: expr=[l_partkey@0 as l_partkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@3 >= 9162 AND l_shipdate@3 < 9190 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, predicate=l_shipdate@10 >= 9162 AND l_shipdate@10 < 9190, pruning_predicate=l_shipdate_max@0 >= 9162 AND l_shipdate_min@1 < 9190, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate] +Query Stage #0 (2 -> 4): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) + ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/part.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_type] Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/part.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-3.parquet]]}, projection=[p_partkey, p_type] +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) + ProjectionExec: expr=[l_partkey@0 as l_partkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@3 >= 1995-02-01 AND l_shipdate@3 < 1995-03-01 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-02-01 AND l_shipdate@10 < 1995-03-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-02-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-03-01 END, required_guarantees=[] -Query Stage #2 (4 -> 4): +Query Stage #2 (4 -> 1): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([], 4)) - AggregateExec: mode=Partial, gby=[], aggr=[SUM(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - ProjectionExec: expr=[CAST(l_extendedprice@1 AS Decimal128(35, 4)) * CAST(Some(100),23,2 - CAST(l_discount@2 AS Decimal128(23, 2)) AS Decimal128(35, 4)) as CAST(lineitem.l_extendedprice AS Decimal128(35, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(35, 4))CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(35, 4))Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2))CAST(lineitem.l_discount AS Decimal128(23, 2))lineitem.l_discountDecimal128(Some(100),23,2)CAST(lineitem.l_extendedprice AS Decimal128(35, 4))lineitem.l_extendedprice, p_type@4 as p_type] + AggregateExec: mode=Partial, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, p_type@0 as p_type] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_partkey", index: 0 }, Column { name: "p_partkey", index: 0 })] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_type@1, l_extendedprice@3, l_discount@4] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) Query Stage #3 (1 -> 1): -ProjectionExec: expr=[100 * CAST(SUM(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END)@0 AS Float64) / CAST(SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 AS Float64) as promo_revenue] - AggregateExec: mode=Final, gby=[], aggr=[SUM(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] +ProjectionExec: expr=[100 * CAST(sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END)@0 AS Float64) / CAST(sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 AS Float64) as promo_revenue] + AggregateExec: mode=Final, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] CoalescePartitionsExec ShuffleReaderExec(stage_id=2, input_partitioning=Hash([], 4)) diff --git a/testdata/expected-plans/q16.txt b/testdata/expected-plans/q16.txt index d40a22c..4c4b603 100644 --- a/testdata/expected-plans/q16.txt +++ b/testdata/expected-plans/q16.txt @@ -2,9 +2,9 @@ DataFusion Logical Plan ======================= Sort: supplier_cnt DESC NULLS FIRST, part.p_brand ASC NULLS LAST, part.p_type ASC NULLS LAST, part.p_size ASC NULLS LAST - Projection: group_alias_0 AS part.p_brand, group_alias_1 AS part.p_type, group_alias_2 AS part.p_size, COUNT(alias1) AS supplier_cnt - Aggregate: groupBy=[[group_alias_0, group_alias_1, group_alias_2]], aggr=[[COUNT(alias1)]] - Aggregate: groupBy=[[part.p_brand AS group_alias_0, part.p_type AS group_alias_1, part.p_size AS group_alias_2, partsupp.ps_suppkey AS alias1]], aggr=[[]] + Projection: part.p_brand, part.p_type, part.p_size, count(alias1) AS supplier_cnt + Aggregate: groupBy=[[part.p_brand, part.p_type, part.p_size]], aggr=[[count(alias1)]] + Aggregate: groupBy=[[part.p_brand, part.p_type, part.p_size, partsupp.ps_suppkey AS alias1]], aggr=[[]] LeftAnti Join: partsupp.ps_suppkey = __correlated_sq_1.s_suppkey Projection: partsupp.ps_suppkey, part.p_brand, part.p_type, part.p_size Inner Join: partsupp.ps_partkey = part.p_partkey @@ -12,7 +12,7 @@ Sort: supplier_cnt DESC NULLS FIRST, part.p_brand ASC NULLS LAST, part.p_type AS Filter: part.p_brand != Utf8("Brand#14") AND part.p_type NOT LIKE Utf8("SMALL PLATED%") AND part.p_size IN ([Int32(14), Int32(6), Int32(5), Int32(31), Int32(49), Int32(15), Int32(41), Int32(47)]) TableScan: part projection=[p_partkey, p_brand, p_type, p_size], partial_filters=[part.p_brand != Utf8("Brand#14"), part.p_type NOT LIKE Utf8("SMALL PLATED%"), part.p_size IN ([Int32(14), Int32(6), Int32(5), Int32(31), Int32(49), Int32(15), Int32(41), Int32(47)])] SubqueryAlias: __correlated_sq_1 - Projection: supplier.s_suppkey AS s_suppkey + Projection: supplier.s_suppkey Filter: supplier.s_comment LIKE Utf8("%Customer%Complaints%") TableScan: supplier projection=[s_suppkey, s_comment], partial_filters=[supplier.s_comment LIKE Utf8("%Customer%Complaints%")] @@ -20,94 +20,96 @@ DataFusion Physical Plan ======================== SortPreservingMergeExec: [supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 ASC NULLS LAST,p_size@2 ASC NULLS LAST] - SortExec: expr=[supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 ASC NULLS LAST,p_size@2 ASC NULLS LAST] - ProjectionExec: expr=[group_alias_0@0 as part.p_brand, group_alias_1@1 as part.p_type, group_alias_2@2 as part.p_size, COUNT(alias1)@3 as supplier_cnt] - AggregateExec: mode=FinalPartitioned, gby=[group_alias_0@0 as group_alias_0, group_alias_1@1 as group_alias_1, group_alias_2@2 as group_alias_2], aggr=[COUNT(alias1)] + SortExec: expr=[supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 ASC NULLS LAST,p_size@2 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size, count(alias1)@3 as supplier_cnt] + AggregateExec: mode=FinalPartitioned, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size], aggr=[count(alias1)] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "group_alias_0", index: 0 }, Column { name: "group_alias_1", index: 1 }, Column { name: "group_alias_2", index: 2 }], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[group_alias_0@0 as group_alias_0, group_alias_1@1 as group_alias_1, group_alias_2@2 as group_alias_2], aggr=[COUNT(alias1)] - AggregateExec: mode=FinalPartitioned, gby=[group_alias_0@0 as group_alias_0, group_alias_1@1 as group_alias_1, group_alias_2@2 as group_alias_2, alias1@3 as alias1], aggr=[] + RepartitionExec: partitioning=Hash([p_brand@0, p_type@1, p_size@2], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size], aggr=[count(alias1)] + AggregateExec: mode=FinalPartitioned, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size, alias1@3 as alias1], aggr=[] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "group_alias_0", index: 0 }, Column { name: "group_alias_1", index: 1 }, Column { name: "group_alias_2", index: 2 }, Column { name: "alias1", index: 3 }], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[p_brand@1 as group_alias_0, p_type@2 as group_alias_1, p_size@3 as group_alias_2, ps_suppkey@0 as alias1], aggr=[] + RepartitionExec: partitioning=Hash([p_brand@0, p_type@1, p_size@2, alias1@3], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[p_brand@1 as p_brand, p_type@2 as p_type, p_size@3 as p_size, ps_suppkey@0 as alias1], aggr=[] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(Column { name: "ps_suppkey", index: 0 }, Column { name: "s_suppkey", index: 0 })] + HashJoinExec: mode=Partitioned, join_type=RightAnti, on=[(s_suppkey@0, ps_suppkey@0)] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4), input_partitions=4 - ProjectionExec: expr=[ps_suppkey@1 as ps_suppkey, p_brand@3 as p_brand, p_type@4 as p_type, p_size@5 as p_size] + RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 + ProjectionExec: expr=[s_suppkey@0 as s_suppkey] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "ps_partkey", index: 0 }, Column { name: "p_partkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4), input_partitions=4 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-3.parquet]]}, projection=[ps_partkey, ps_suppkey] + FilterExec: s_comment@1 LIKE %Customer%Complaints% + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_comment], predicate=s_comment@6 LIKE %Customer%Complaints% + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4 + ProjectionExec: expr=[ps_suppkey@3 as ps_suppkey, p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_brand@1, p_type@2, p_size@3, ps_suppkey@5] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4), input_partitions=4 + RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_brand@1 != Brand#14 AND p_type@2 NOT LIKE SMALL PLATED% AND Use p_size@3 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/part.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-3.parquet]]}, predicate=p_brand@3 != Brand#14 AND p_type@4 NOT LIKE SMALL PLATED% AND Use p_size@5 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]), pruning_predicate=(p_brand_min@0 != Brand#14 OR Brand#14 != p_brand_max@1) AND (p_size_min@2 <= 14 AND 14 <= p_size_max@3 OR p_size_min@2 <= 6 AND 6 <= p_size_max@3 OR p_size_min@2 <= 5 AND 5 <= p_size_max@3 OR p_size_min@2 <= 31 AND 31 <= p_size_max@3 OR p_size_min@2 <= 49 AND 49 <= p_size_max@3 OR p_size_min@2 <= 15 AND 15 <= p_size_max@3 OR p_size_min@2 <= 41 AND 41 <= p_size_max@3 OR p_size_min@2 <= 47 AND 47 <= p_size_max@3), projection=[p_partkey, p_brand, p_type, p_size] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4), input_partitions=4 - ProjectionExec: expr=[s_suppkey@0 as s_suppkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: s_comment@1 LIKE %Customer%Complaints% - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-3.parquet]]}, predicate=s_comment@6 LIKE %Customer%Complaints%, projection=[s_suppkey, s_comment] + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/part.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_brand, p_type, p_size], predicate=p_brand@3 != Brand#14 AND p_type@4 NOT LIKE SMALL PLATED% AND Use p_size@5 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]), pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 != Brand#14 OR Brand#14 != p_brand_max@1 END AND (CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 14 AND 14 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 6 AND 6 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 5 AND 5 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 31 AND 31 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 49 AND 49 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 15 AND 15 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 41 AND 41 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 47 AND 47 <= p_size_max@5 END), required_guarantees=[p_brand not in (Brand#14), p_size in (41, 14, 49, 6, 31, 15, 5, 47)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey] RaySQL Plan =========== -Query Stage #0 (4 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-3.parquet]]}, projection=[ps_partkey, ps_suppkey] +Query Stage #0 (2 -> 4): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) + ProjectionExec: expr=[s_suppkey@0 as s_suppkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: s_comment@1 LIKE %Customer%Complaints% + ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_comment], predicate=s_comment@6 LIKE %Customer%Complaints% -Query Stage #1 (4 -> 4): +Query Stage #1 (2 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_brand@1 != Brand#14 AND p_type@2 NOT LIKE SMALL PLATED% AND Use p_size@3 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/part.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-3.parquet]]}, predicate=p_brand@3 != Brand#14 AND p_type@4 NOT LIKE SMALL PLATED% AND Use p_size@5 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]), pruning_predicate=(p_brand_min@0 != Brand#14 OR Brand#14 != p_brand_max@1) AND (p_size_min@2 <= 14 AND 14 <= p_size_max@3 OR p_size_min@2 <= 6 AND 6 <= p_size_max@3 OR p_size_min@2 <= 5 AND 5 <= p_size_max@3 OR p_size_min@2 <= 31 AND 31 <= p_size_max@3 OR p_size_min@2 <= 49 AND 49 <= p_size_max@3 OR p_size_min@2 <= 15 AND 15 <= p_size_max@3 OR p_size_min@2 <= 41 AND 41 <= p_size_max@3 OR p_size_min@2 <= 47 AND 47 <= p_size_max@3), projection=[p_partkey, p_brand, p_type, p_size] + ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/part.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_brand, p_type, p_size], predicate=p_brand@3 != Brand#14 AND p_type@4 NOT LIKE SMALL PLATED% AND Use p_size@5 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]), pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 != Brand#14 OR Brand#14 != p_brand_max@1 END AND (CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 14 AND 14 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 6 AND 6 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 5 AND 5 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 31 AND 31 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 49 AND 49 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 15 AND 15 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 41 AND 41 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 47 AND 47 <= p_size_max@5 END), required_guarantees=[p_brand not in (Brand#14), p_size in (41, 14, 49, 6, 31, 15, 5, 47)] Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4)) - ProjectionExec: expr=[ps_suppkey@1 as ps_suppkey, p_brand@3 as p_brand, p_type@4 as p_type, p_size@5 as p_size] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "ps_partkey", index: 0 }, Column { name: "p_partkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey] Query Stage #3 (4 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ProjectionExec: expr=[s_suppkey@0 as s_suppkey] +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4)) + ProjectionExec: expr=[ps_suppkey@3 as ps_suppkey, p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: s_comment@1 LIKE %Customer%Complaints% - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-3.parquet]]}, predicate=s_comment@6 LIKE %Customer%Complaints%, projection=[s_suppkey, s_comment] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_brand@1, p_type@2, p_size@3, ps_suppkey@5] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) Query Stage #4 (4 -> 4): -ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "group_alias_0", index: 0 }, Column { name: "group_alias_1", index: 1 }, Column { name: "group_alias_2", index: 2 }, Column { name: "alias1", index: 3 }], 4)) - AggregateExec: mode=Partial, gby=[p_brand@1 as group_alias_0, p_type@2 as group_alias_1, p_size@3 as group_alias_2, ps_suppkey@0 as alias1], aggr=[] +ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "p_brand", index: 0 }, Column { name: "p_type", index: 1 }, Column { name: "p_size", index: 2 }, Column { name: "alias1", index: 3 }], 4)) + AggregateExec: mode=Partial, gby=[p_brand@1 as p_brand, p_type@2 as p_type, p_size@3 as p_size, ps_suppkey@0 as alias1], aggr=[] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(Column { name: "ps_suppkey", index: 0 }, Column { name: "s_suppkey", index: 0 })] + HashJoinExec: mode=Partitioned, join_type=RightAnti, on=[(s_suppkey@0, ps_suppkey@0)] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4)) + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4)) Query Stage #5 (4 -> 4): -ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "group_alias_0", index: 0 }, Column { name: "group_alias_1", index: 1 }, Column { name: "group_alias_2", index: 2 }], 4)) - AggregateExec: mode=Partial, gby=[group_alias_0@0 as group_alias_0, group_alias_1@1 as group_alias_1, group_alias_2@2 as group_alias_2], aggr=[COUNT(alias1)] - AggregateExec: mode=FinalPartitioned, gby=[group_alias_0@0 as group_alias_0, group_alias_1@1 as group_alias_1, group_alias_2@2 as group_alias_2, alias1@3 as alias1], aggr=[] +ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "p_brand", index: 0 }, Column { name: "p_type", index: 1 }, Column { name: "p_size", index: 2 }], 4)) + AggregateExec: mode=Partial, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size], aggr=[count(alias1)] + AggregateExec: mode=FinalPartitioned, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size, alias1@3 as alias1], aggr=[] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "group_alias_0", index: 0 }, Column { name: "group_alias_1", index: 1 }, Column { name: "group_alias_2", index: 2 }, Column { name: "alias1", index: 3 }], 4)) + ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "p_brand", index: 0 }, Column { name: "p_type", index: 1 }, Column { name: "p_size", index: 2 }, Column { name: "alias1", index: 3 }], 4)) Query Stage #6 (4 -> 4): -ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "part.p_brand", index: 0 }, Column { name: "part.p_type", index: 1 }, Column { name: "part.p_size", index: 2 }], 4)) - SortExec: expr=[supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 ASC NULLS LAST,p_size@2 ASC NULLS LAST] - ProjectionExec: expr=[group_alias_0@0 as part.p_brand, group_alias_1@1 as part.p_type, group_alias_2@2 as part.p_size, COUNT(alias1)@3 as supplier_cnt] - AggregateExec: mode=FinalPartitioned, gby=[group_alias_0@0 as group_alias_0, group_alias_1@1 as group_alias_1, group_alias_2@2 as group_alias_2], aggr=[COUNT(alias1)] +ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "p_brand", index: 0 }, Column { name: "p_type", index: 1 }, Column { name: "p_size", index: 2 }], 4)) + SortExec: expr=[supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 ASC NULLS LAST,p_size@2 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size, count(alias1)@3 as supplier_cnt] + AggregateExec: mode=FinalPartitioned, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size], aggr=[count(alias1)] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "group_alias_0", index: 0 }, Column { name: "group_alias_1", index: 1 }, Column { name: "group_alias_2", index: 2 }], 4)) + ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "p_brand", index: 0 }, Column { name: "p_type", index: 1 }, Column { name: "p_size", index: 2 }], 4)) Query Stage #7 (4 -> 1): SortPreservingMergeExec: [supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 ASC NULLS LAST,p_size@2 ASC NULLS LAST] - ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "part.p_brand", index: 0 }, Column { name: "part.p_type", index: 1 }, Column { name: "part.p_size", index: 2 }], 4)) + ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "p_brand", index: 0 }, Column { name: "p_type", index: 1 }, Column { name: "p_size", index: 2 }], 4)) diff --git a/testdata/expected-plans/q17.txt b/testdata/expected-plans/q17.txt index 3604601..9c52300 100644 --- a/testdata/expected-plans/q17.txt +++ b/testdata/expected-plans/q17.txt @@ -1,100 +1,89 @@ DataFusion Logical Plan ======================= -Projection: CAST(SUM(lineitem.l_extendedprice) AS Float64) / Float64(7) AS avg_yearly - Aggregate: groupBy=[[]], aggr=[[SUM(lineitem.l_extendedprice)]] +Projection: CAST(sum(lineitem.l_extendedprice) AS Float64) / Float64(7) AS avg_yearly + Aggregate: groupBy=[[]], aggr=[[sum(lineitem.l_extendedprice)]] Projection: lineitem.l_extendedprice - Filter: CAST(lineitem.l_quantity AS Decimal128(30, 15)) < CAST(__scalar_sq_1.__value AS Decimal128(30, 15)) AND __scalar_sq_1.l_partkey = lineitem.l_partkey - Projection: lineitem.l_partkey, lineitem.l_quantity, lineitem.l_extendedprice, __scalar_sq_1.l_partkey, __scalar_sq_1.__value - Inner Join: part.p_partkey = __scalar_sq_1.l_partkey - Filter: part.p_partkey = lineitem.l_partkey AND lineitem.l_partkey = part.p_partkey - Inner Join: lineitem.l_partkey = part.p_partkey - TableScan: lineitem projection=[l_partkey, l_quantity, l_extendedprice] - Projection: part.p_partkey - Filter: part.p_brand = Utf8("Brand#42") AND part.p_container = Utf8("LG BAG") - TableScan: part projection=[p_partkey, p_brand, p_container], partial_filters=[part.p_brand = Utf8("Brand#42"), part.p_container = Utf8("LG BAG")] - SubqueryAlias: __scalar_sq_1 - Projection: lineitem.l_partkey, Float64(0.2) * CAST(AVG(lineitem.l_quantity) AS Float64) AS __value - Aggregate: groupBy=[[lineitem.l_partkey]], aggr=[[AVG(lineitem.l_quantity)]] - TableScan: lineitem projection=[l_partkey, l_quantity] + Inner Join: part.p_partkey = __scalar_sq_1.l_partkey Filter: CAST(lineitem.l_quantity AS Decimal128(30, 15)) < __scalar_sq_1.Float64(0.2) * avg(lineitem.l_quantity) + Projection: lineitem.l_quantity, lineitem.l_extendedprice, part.p_partkey + Inner Join: lineitem.l_partkey = part.p_partkey + TableScan: lineitem projection=[l_partkey, l_quantity, l_extendedprice] + Projection: part.p_partkey + Filter: part.p_brand = Utf8("Brand#42") AND part.p_container = Utf8("LG BAG") + TableScan: part projection=[p_partkey, p_brand, p_container], partial_filters=[part.p_brand = Utf8("Brand#42"), part.p_container = Utf8("LG BAG")] + SubqueryAlias: __scalar_sq_1 + Projection: CAST(Float64(0.2) * CAST(avg(lineitem.l_quantity) AS Float64) AS Decimal128(30, 15)), lineitem.l_partkey + Aggregate: groupBy=[[lineitem.l_partkey]], aggr=[[avg(lineitem.l_quantity)]] + TableScan: lineitem projection=[l_partkey, l_quantity] DataFusion Physical Plan ======================== -ProjectionExec: expr=[CAST(SUM(lineitem.l_extendedprice)@0 AS Float64) / 7 as avg_yearly] - AggregateExec: mode=Final, gby=[], aggr=[SUM(lineitem.l_extendedprice)] +ProjectionExec: expr=[CAST(sum(lineitem.l_extendedprice)@0 AS Float64) / 7 as avg_yearly] + AggregateExec: mode=Final, gby=[], aggr=[sum(lineitem.l_extendedprice)] CoalescePartitionsExec - AggregateExec: mode=Partial, gby=[], aggr=[SUM(lineitem.l_extendedprice)] - ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: CAST(l_quantity@1 AS Decimal128(30, 15)) < CAST(__value@4 AS Decimal128(30, 15)) AND l_partkey@3 = l_partkey@0 - ProjectionExec: expr=[l_partkey@0 as l_partkey, l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_partkey@4 as l_partkey, __value@5 as __value] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "p_partkey", index: 3 }, Column { name: "l_partkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_partkey@3 = l_partkey@0 AND l_partkey@0 = p_partkey@3 + AggregateExec: mode=Partial, gby=[], aggr=[sum(lineitem.l_extendedprice)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@2, l_partkey@1)], filter=CAST(l_quantity@0 AS Decimal128(30, 15)) < Float64(0.2) * avg(lineitem.l_quantity)@1, projection=[l_extendedprice@1] + ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_partkey@0, l_quantity@2, l_extendedprice@3] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 + ProjectionExec: expr=[p_partkey@0 as p_partkey] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_partkey", index: 0 }, Column { name: "p_partkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4), input_partitions=4 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, projection=[l_partkey, l_quantity, l_extendedprice] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4), input_partitions=4 - ProjectionExec: expr=[p_partkey@0 as p_partkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_brand@1 = Brand#42 AND p_container@2 = LG BAG - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/part.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-3.parquet]]}, predicate=p_brand@3 = Brand#42 AND p_container@6 = LG BAG, pruning_predicate=p_brand_min@0 <= Brand#42 AND Brand#42 <= p_brand_max@1 AND p_container_min@2 <= LG BAG AND LG BAG <= p_container_max@3, projection=[p_partkey, p_brand, p_container] - ProjectionExec: expr=[l_partkey@0 as l_partkey, 0.2 * CAST(AVG(lineitem.l_quantity)@1 AS Float64) as __value] - AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey], aggr=[AVG(lineitem.l_quantity)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey], aggr=[AVG(lineitem.l_quantity)] - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, projection=[l_partkey, l_quantity] + FilterExec: p_brand@1 = Brand#42 AND p_container@2 = LG BAG + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/part.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_brand, p_container], predicate=p_brand@3 = Brand#42 AND p_container@6 = LG BAG, pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 <= Brand#42 AND Brand#42 <= p_brand_max@1 END AND CASE WHEN p_container_null_count@6 = p_container_row_count@7 THEN false ELSE p_container_min@4 <= LG BAG AND LG BAG <= p_container_max@5 END, required_guarantees=[p_brand in (Brand#42), p_container in (LG BAG)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_quantity, l_extendedprice] + ProjectionExec: expr=[CAST(0.2 * CAST(avg(lineitem.l_quantity)@1 AS Float64) AS Decimal128(30, 15)) as Float64(0.2) * avg(lineitem.l_quantity), l_partkey@0 as l_partkey] + AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_quantity] RaySQL Plan =========== -Query Stage #0 (4 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, projection=[l_partkey, l_quantity, l_extendedprice] - -Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) +Query Stage #0 (2 -> 4): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) ProjectionExec: expr=[p_partkey@0 as p_partkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_brand@1 = Brand#42 AND p_container@2 = LG BAG - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/part.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-3.parquet]]}, predicate=p_brand@3 = Brand#42 AND p_container@6 = LG BAG, pruning_predicate=p_brand_min@0 <= Brand#42 AND Brand#42 <= p_brand_max@1 AND p_container_min@2 <= LG BAG AND LG BAG <= p_container_max@3, projection=[p_partkey, p_brand, p_container] + ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/part.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_brand, p_container], predicate=p_brand@3 = Brand#42 AND p_container@6 = LG BAG, pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 <= Brand#42 AND Brand#42 <= p_brand_max@1 END AND CASE WHEN p_container_null_count@6 = p_container_row_count@7 THEN false ELSE p_container_min@4 <= LG BAG AND LG BAG <= p_container_max@5 END, required_guarantees=[p_brand in (Brand#42), p_container in (LG BAG)] + +Query Stage #1 (4 -> 4): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_quantity, l_extendedprice] Query Stage #2 (4 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) - AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey], aggr=[AVG(lineitem.l_quantity)] - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, projection=[l_partkey, l_quantity] + AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_quantity] -Query Stage #3 (4 -> 4): +Query Stage #3 (4 -> 1): ShuffleWriterExec(stage_id=3, output_partitioning=Hash([], 4)) - AggregateExec: mode=Partial, gby=[], aggr=[SUM(lineitem.l_extendedprice)] - ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: CAST(l_quantity@1 AS Decimal128(30, 15)) < CAST(__value@4 AS Decimal128(30, 15)) AND l_partkey@3 = l_partkey@0 - ProjectionExec: expr=[l_partkey@0 as l_partkey, l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_partkey@4 as l_partkey, __value@5 as __value] + AggregateExec: mode=Partial, gby=[], aggr=[sum(lineitem.l_extendedprice)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@2, l_partkey@1)], filter=CAST(l_quantity@0 AS Decimal128(30, 15)) < Float64(0.2) * avg(lineitem.l_quantity)@1, projection=[l_extendedprice@1] + ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_partkey@0, l_quantity@2, l_extendedprice@3] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) + ProjectionExec: expr=[CAST(0.2 * CAST(avg(lineitem.l_quantity)@1 AS Float64) AS Decimal128(30, 15)) as Float64(0.2) * avg(lineitem.l_quantity), l_partkey@0 as l_partkey] + AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "p_partkey", index: 3 }, Column { name: "l_partkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_partkey@3 = l_partkey@0 AND l_partkey@0 = p_partkey@3 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_partkey", index: 0 }, Column { name: "p_partkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - ProjectionExec: expr=[l_partkey@0 as l_partkey, 0.2 * CAST(AVG(lineitem.l_quantity)@1 AS Float64) as __value] - AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey], aggr=[AVG(lineitem.l_quantity)] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) Query Stage #4 (1 -> 1): -ProjectionExec: expr=[CAST(SUM(lineitem.l_extendedprice)@0 AS Float64) / 7 as avg_yearly] - AggregateExec: mode=Final, gby=[], aggr=[SUM(lineitem.l_extendedprice)] +ProjectionExec: expr=[CAST(sum(lineitem.l_extendedprice)@0 AS Float64) / 7 as avg_yearly] + AggregateExec: mode=Final, gby=[], aggr=[sum(lineitem.l_extendedprice)] CoalescePartitionsExec ShuffleReaderExec(stage_id=3, input_partitioning=Hash([], 4)) diff --git a/testdata/expected-plans/q18.txt b/testdata/expected-plans/q18.txt index f2ae3d8..75e1d62 100644 --- a/testdata/expected-plans/q18.txt +++ b/testdata/expected-plans/q18.txt @@ -3,7 +3,7 @@ DataFusion Logical Plan Limit: skip=0, fetch=100 Sort: orders.o_totalprice DESC NULLS FIRST, orders.o_orderdate ASC NULLS LAST, fetch=100 - Aggregate: groupBy=[[customer.c_name, customer.c_custkey, orders.o_orderkey, orders.o_orderdate, orders.o_totalprice]], aggr=[[SUM(lineitem.l_quantity)]] + Aggregate: groupBy=[[customer.c_name, customer.c_custkey, orders.o_orderkey, orders.o_orderdate, orders.o_totalprice]], aggr=[[sum(lineitem.l_quantity)]] LeftSemi Join: orders.o_orderkey = __correlated_sq_1.l_orderkey Projection: customer.c_custkey, customer.c_name, orders.o_orderkey, orders.o_totalprice, orders.o_orderdate, lineitem.l_quantity Inner Join: orders.o_orderkey = lineitem.l_orderkey @@ -13,107 +13,103 @@ Limit: skip=0, fetch=100 TableScan: orders projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] TableScan: lineitem projection=[l_orderkey, l_quantity] SubqueryAlias: __correlated_sq_1 - Projection: lineitem.l_orderkey AS l_orderkey - Filter: SUM(lineitem.l_quantity) > Decimal128(Some(31300),21,2) - Aggregate: groupBy=[[lineitem.l_orderkey]], aggr=[[SUM(lineitem.l_quantity)]] + Projection: lineitem.l_orderkey + Filter: sum(lineitem.l_quantity) > Decimal128(Some(31300),21,2) + Aggregate: groupBy=[[lineitem.l_orderkey]], aggr=[[sum(lineitem.l_quantity)]] TableScan: lineitem projection=[l_orderkey, l_quantity] DataFusion Physical Plan ======================== GlobalLimitExec: skip=0, fetch=100 - SortPreservingMergeExec: [o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAST] - SortExec: fetch=100, expr=[o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAST] - AggregateExec: mode=FinalPartitioned, gby=[c_name@0 as c_name, c_custkey@1 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@3 as o_orderdate, o_totalprice@4 as o_totalprice], aggr=[SUM(lineitem.l_quantity)] + SortPreservingMergeExec: [o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAST], fetch=100 + SortExec: TopK(fetch=100), expr=[o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAST], preserve_partitioning=[true] + AggregateExec: mode=FinalPartitioned, gby=[c_name@0 as c_name, c_custkey@1 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@3 as o_orderdate, o_totalprice@4 as o_totalprice], aggr=[sum(lineitem.l_quantity)] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "c_name", index: 0 }, Column { name: "c_custkey", index: 1 }, Column { name: "o_orderkey", index: 2 }, Column { name: "o_orderdate", index: 3 }, Column { name: "o_totalprice", index: 4 }], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[c_name@1 as c_name, c_custkey@0 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@4 as o_orderdate, o_totalprice@3 as o_totalprice], aggr=[SUM(lineitem.l_quantity)] + RepartitionExec: partitioning=Hash([c_name@0, c_custkey@1, o_orderkey@2, o_orderdate@3, o_totalprice@4], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[c_name@1 as c_name, c_custkey@0 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@4 as o_orderdate, o_totalprice@3 as o_totalprice], aggr=[sum(lineitem.l_quantity)] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(Column { name: "o_orderkey", index: 2 }, Column { name: "l_orderkey", index: 0 })] - ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, o_orderkey@2 as o_orderkey, o_totalprice@3 as o_totalprice, o_orderdate@4 as o_orderdate, l_quantity@6 as l_quantity] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "o_orderkey", index: 2 }, Column { name: "l_orderkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "o_orderkey", index: 2 }], 4), input_partitions=4 - ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, o_orderkey@2 as o_orderkey, o_totalprice@4 as o_totalprice, o_orderdate@5 as o_orderdate] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "c_custkey", index: 0 }, Column { name: "o_custkey", index: 1 })] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4), input_partitions=4 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-3.parquet]]}, projection=[c_custkey, c_name] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4), input_partitions=4 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-3.parquet]]}, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4), input_partitions=4 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, projection=[l_orderkey, l_quantity] + HashJoinExec: mode=Partitioned, join_type=RightSemi, on=[(l_orderkey@0, o_orderkey@2)] ProjectionExec: expr=[l_orderkey@0 as l_orderkey] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: SUM(lineitem.l_quantity)@1 > Some(31300),21,2 - AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey], aggr=[SUM(lineitem.l_quantity)] + FilterExec: sum(lineitem.l_quantity)@1 > Some(31300),21,2 + AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[l_orderkey@0 as l_orderkey], aggr=[SUM(lineitem.l_quantity)] - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, projection=[l_orderkey, l_quantity] + RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_quantity] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@2, l_orderkey@0)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@3, o_orderdate@4, l_quantity@6] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_orderkey@2], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@4, o_orderdate@5] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_name] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_quantity] RaySQL Plan =========== Query Stage #0 (4 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-3.parquet]]}, projection=[c_custkey, c_name] +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) + AggregateExec: mode=Partial, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_quantity] Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-3.parquet]]}, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_name] Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "o_orderkey", index: 2 }], 4)) - ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, o_orderkey@2 as o_orderkey, o_totalprice@4 as o_totalprice, o_orderdate@5 as o_orderdate] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "c_custkey", index: 0 }, Column { name: "o_custkey", index: 1 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] Query Stage #3 (4 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, projection=[l_orderkey, l_quantity] +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "o_orderkey", index: 2 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@4, o_orderdate@5] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) Query Stage #4 (4 -> 4): ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - AggregateExec: mode=Partial, gby=[l_orderkey@0 as l_orderkey], aggr=[SUM(lineitem.l_quantity)] - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, projection=[l_orderkey, l_quantity] + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_quantity] Query Stage #5 (4 -> 4): ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "c_name", index: 0 }, Column { name: "c_custkey", index: 1 }, Column { name: "o_orderkey", index: 2 }, Column { name: "o_orderdate", index: 3 }, Column { name: "o_totalprice", index: 4 }], 4)) - AggregateExec: mode=Partial, gby=[c_name@1 as c_name, c_custkey@0 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@4 as o_orderdate, o_totalprice@3 as o_totalprice], aggr=[SUM(lineitem.l_quantity)] + AggregateExec: mode=Partial, gby=[c_name@1 as c_name, c_custkey@0 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@4 as o_orderdate, o_totalprice@3 as o_totalprice], aggr=[sum(lineitem.l_quantity)] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(Column { name: "o_orderkey", index: 2 }, Column { name: "l_orderkey", index: 0 })] - ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, o_orderkey@2 as o_orderkey, o_totalprice@3 as o_totalprice, o_orderdate@4 as o_orderdate, l_quantity@6 as l_quantity] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "o_orderkey", index: 2 }, Column { name: "l_orderkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "o_orderkey", index: 2 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) + HashJoinExec: mode=Partitioned, join_type=RightSemi, on=[(l_orderkey@0, o_orderkey@2)] ProjectionExec: expr=[l_orderkey@0 as l_orderkey] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: SUM(lineitem.l_quantity)@1 > Some(31300),21,2 - AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey], aggr=[SUM(lineitem.l_quantity)] + FilterExec: sum(lineitem.l_quantity)@1 > Some(31300),21,2 + AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@2, l_orderkey@0)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@3, o_orderdate@4, l_quantity@6] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "o_orderkey", index: 2 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) Query Stage #6 (4 -> 4): ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "c_name", index: 0 }, Column { name: "c_custkey", index: 1 }, Column { name: "o_orderkey", index: 2 }, Column { name: "o_orderdate", index: 3 }, Column { name: "o_totalprice", index: 4 }], 4)) - SortExec: fetch=100, expr=[o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAST] - AggregateExec: mode=FinalPartitioned, gby=[c_name@0 as c_name, c_custkey@1 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@3 as o_orderdate, o_totalprice@4 as o_totalprice], aggr=[SUM(lineitem.l_quantity)] + SortExec: TopK(fetch=100), expr=[o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAST], preserve_partitioning=[true] + AggregateExec: mode=FinalPartitioned, gby=[c_name@0 as c_name, c_custkey@1 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@3 as o_orderdate, o_totalprice@4 as o_totalprice], aggr=[sum(lineitem.l_quantity)] CoalesceBatchesExec: target_batch_size=8192 ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "c_name", index: 0 }, Column { name: "c_custkey", index: 1 }, Column { name: "o_orderkey", index: 2 }, Column { name: "o_orderdate", index: 3 }, Column { name: "o_totalprice", index: 4 }], 4)) Query Stage #7 (1 -> 1): GlobalLimitExec: skip=0, fetch=100 - SortPreservingMergeExec: [o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAST] + SortPreservingMergeExec: [o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAST], fetch=100 ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "c_name", index: 0 }, Column { name: "c_custkey", index: 1 }, Column { name: "o_orderkey", index: 2 }, Column { name: "o_orderdate", index: 3 }, Column { name: "o_totalprice", index: 4 }], 4)) diff --git a/testdata/expected-plans/q19.txt b/testdata/expected-plans/q19.txt index e61791f..4571ed0 100644 --- a/testdata/expected-plans/q19.txt +++ b/testdata/expected-plans/q19.txt @@ -1,76 +1,67 @@ DataFusion Logical Plan ======================= -Projection: SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue - Aggregate: groupBy=[[]], aggr=[[SUM(CAST(lineitem.l_extendedprice AS Decimal128(35, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(35, 4))) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] +Projection: sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue + Aggregate: groupBy=[[]], aggr=[[sum(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] Projection: lineitem.l_extendedprice, lineitem.l_discount - Filter: part.p_brand = Utf8("Brand#21") AND part.p_container IN ([Utf8("SM CASE"), Utf8("SM BOX"), Utf8("SM PACK"), Utf8("SM PKG")]) AND lineitem.l_quantity >= Decimal128(Some(800),11,2) AND lineitem.l_quantity <= Decimal128(Some(1800),11,2) AND part.p_size <= Int32(5) OR part.p_brand = Utf8("Brand#13") AND part.p_container IN ([Utf8("MED BAG"), Utf8("MED BOX"), Utf8("MED PKG"), Utf8("MED PACK")]) AND lineitem.l_quantity >= Decimal128(Some(2000),11,2) AND lineitem.l_quantity <= Decimal128(Some(3000),11,2) AND part.p_size <= Int32(10) OR part.p_brand = Utf8("Brand#52") AND part.p_container IN ([Utf8("LG CASE"), Utf8("LG BOX"), Utf8("LG PACK"), Utf8("LG PKG")]) AND lineitem.l_quantity >= Decimal128(Some(3000),11,2) AND lineitem.l_quantity <= Decimal128(Some(4000),11,2) AND part.p_size <= Int32(15) - Projection: lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, part.p_brand, part.p_size, part.p_container - Inner Join: lineitem.l_partkey = part.p_partkey - Projection: lineitem.l_partkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount - Filter: (lineitem.l_quantity >= Decimal128(Some(800),11,2) AND lineitem.l_quantity <= Decimal128(Some(1800),11,2) OR lineitem.l_quantity >= Decimal128(Some(2000),11,2) AND lineitem.l_quantity <= Decimal128(Some(3000),11,2) OR lineitem.l_quantity >= Decimal128(Some(3000),11,2) AND lineitem.l_quantity <= Decimal128(Some(4000),11,2)) AND (lineitem.l_shipmode = Utf8("AIR REG") OR lineitem.l_shipmode = Utf8("AIR")) AND lineitem.l_shipinstruct = Utf8("DELIVER IN PERSON") - TableScan: lineitem projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], partial_filters=[lineitem.l_shipmode = Utf8("AIR REG") OR lineitem.l_shipmode = Utf8("AIR"), lineitem.l_shipinstruct = Utf8("DELIVER IN PERSON"), lineitem.l_quantity >= Decimal128(Some(800),11,2) AND lineitem.l_quantity <= Decimal128(Some(1800),11,2) OR lineitem.l_quantity >= Decimal128(Some(2000),11,2) AND lineitem.l_quantity <= Decimal128(Some(3000),11,2) OR lineitem.l_quantity >= Decimal128(Some(3000),11,2) AND lineitem.l_quantity <= Decimal128(Some(4000),11,2)] - Filter: (part.p_brand = Utf8("Brand#21") AND part.p_container IN ([Utf8("SM CASE"), Utf8("SM BOX"), Utf8("SM PACK"), Utf8("SM PKG")]) AND part.p_size <= Int32(5) OR part.p_brand = Utf8("Brand#13") AND part.p_container IN ([Utf8("MED BAG"), Utf8("MED BOX"), Utf8("MED PKG"), Utf8("MED PACK")]) AND part.p_size <= Int32(10) OR part.p_brand = Utf8("Brand#52") AND part.p_container IN ([Utf8("LG CASE"), Utf8("LG BOX"), Utf8("LG PACK"), Utf8("LG PKG")]) AND part.p_size <= Int32(15)) AND part.p_size >= Int32(1) - TableScan: part projection=[p_partkey, p_brand, p_size, p_container], partial_filters=[part.p_size >= Int32(1), part.p_brand = Utf8("Brand#21") AND part.p_container IN ([Utf8("SM CASE"), Utf8("SM BOX"), Utf8("SM PACK"), Utf8("SM PKG")]) AND part.p_size <= Int32(5) OR part.p_brand = Utf8("Brand#13") AND part.p_container IN ([Utf8("MED BAG"), Utf8("MED BOX"), Utf8("MED PKG"), Utf8("MED PACK")]) AND part.p_size <= Int32(10) OR part.p_brand = Utf8("Brand#52") AND part.p_container IN ([Utf8("LG CASE"), Utf8("LG BOX"), Utf8("LG PACK"), Utf8("LG PKG")]) AND part.p_size <= Int32(15)] + Inner Join: lineitem.l_partkey = part.p_partkey Filter: part.p_brand = Utf8("Brand#21") AND part.p_container IN ([Utf8("SM CASE"), Utf8("SM BOX"), Utf8("SM PACK"), Utf8("SM PKG")]) AND lineitem.l_quantity >= Decimal128(Some(800),11,2) AND lineitem.l_quantity <= Decimal128(Some(1800),11,2) AND part.p_size <= Int32(5) OR part.p_brand = Utf8("Brand#13") AND part.p_container IN ([Utf8("MED BAG"), Utf8("MED BOX"), Utf8("MED PKG"), Utf8("MED PACK")]) AND lineitem.l_quantity >= Decimal128(Some(2000),11,2) AND lineitem.l_quantity <= Decimal128(Some(3000),11,2) AND part.p_size <= Int32(10) OR part.p_brand = Utf8("Brand#52") AND part.p_container IN ([Utf8("LG CASE"), Utf8("LG BOX"), Utf8("LG PACK"), Utf8("LG PKG")]) AND lineitem.l_quantity >= Decimal128(Some(3000),11,2) AND lineitem.l_quantity <= Decimal128(Some(4000),11,2) AND part.p_size <= Int32(15) + Projection: lineitem.l_partkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount + Filter: (lineitem.l_quantity >= Decimal128(Some(800),11,2) AND lineitem.l_quantity <= Decimal128(Some(1800),11,2) OR lineitem.l_quantity >= Decimal128(Some(2000),11,2) AND lineitem.l_quantity <= Decimal128(Some(3000),11,2) OR lineitem.l_quantity >= Decimal128(Some(3000),11,2) AND lineitem.l_quantity <= Decimal128(Some(4000),11,2)) AND (lineitem.l_shipmode = Utf8("AIR") OR lineitem.l_shipmode = Utf8("AIR REG")) AND lineitem.l_shipinstruct = Utf8("DELIVER IN PERSON") + TableScan: lineitem projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], partial_filters=[lineitem.l_shipmode = Utf8("AIR") OR lineitem.l_shipmode = Utf8("AIR REG"), lineitem.l_shipinstruct = Utf8("DELIVER IN PERSON"), lineitem.l_quantity >= Decimal128(Some(800),11,2) AND lineitem.l_quantity <= Decimal128(Some(1800),11,2) OR lineitem.l_quantity >= Decimal128(Some(2000),11,2) AND lineitem.l_quantity <= Decimal128(Some(3000),11,2) OR lineitem.l_quantity >= Decimal128(Some(3000),11,2) AND lineitem.l_quantity <= Decimal128(Some(4000),11,2)] + Filter: (part.p_brand = Utf8("Brand#21") AND part.p_container IN ([Utf8("SM CASE"), Utf8("SM BOX"), Utf8("SM PACK"), Utf8("SM PKG")]) AND part.p_size <= Int32(5) OR part.p_brand = Utf8("Brand#13") AND part.p_container IN ([Utf8("MED BAG"), Utf8("MED BOX"), Utf8("MED PKG"), Utf8("MED PACK")]) AND part.p_size <= Int32(10) OR part.p_brand = Utf8("Brand#52") AND part.p_container IN ([Utf8("LG CASE"), Utf8("LG BOX"), Utf8("LG PACK"), Utf8("LG PKG")]) AND part.p_size <= Int32(15)) AND part.p_size >= Int32(1) + TableScan: part projection=[p_partkey, p_brand, p_size, p_container], partial_filters=[part.p_size >= Int32(1), part.p_brand = Utf8("Brand#21") AND part.p_container IN ([Utf8("SM CASE"), Utf8("SM BOX"), Utf8("SM PACK"), Utf8("SM PKG")]) AND part.p_size <= Int32(5) OR part.p_brand = Utf8("Brand#13") AND part.p_container IN ([Utf8("MED BAG"), Utf8("MED BOX"), Utf8("MED PKG"), Utf8("MED PACK")]) AND part.p_size <= Int32(10) OR part.p_brand = Utf8("Brand#52") AND part.p_container IN ([Utf8("LG CASE"), Utf8("LG BOX"), Utf8("LG PACK"), Utf8("LG PKG")]) AND part.p_size <= Int32(15)] DataFusion Physical Plan ======================== -ProjectionExec: expr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@0 as revenue] - AggregateExec: mode=Final, gby=[], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] +ProjectionExec: expr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@0 as revenue] + AggregateExec: mode=Final, gby=[], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] CoalescePartitionsExec - AggregateExec: mode=Partial, gby=[], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_brand@3 = Brand#21 AND Use p_container@5 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND l_quantity@0 >= Some(800),11,2 AND l_quantity@0 <= Some(1800),11,2 AND p_size@4 <= 5 OR p_brand@3 = Brand#13 AND Use p_container@5 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND l_quantity@0 >= Some(2000),11,2 AND l_quantity@0 <= Some(3000),11,2 AND p_size@4 <= 10 OR p_brand@3 = Brand#52 AND Use p_container@5 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND l_quantity@0 >= Some(3000),11,2 AND l_quantity@0 <= Some(4000),11,2 AND p_size@4 <= 15 - ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, p_brand@5 as p_brand, p_size@6 as p_size, p_container@7 as p_container] + AggregateExec: mode=Partial, gby=[], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], filter=p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND l_quantity@0 >= Some(800),11,2 AND l_quantity@0 <= Some(1800),11,2 AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND l_quantity@0 >= Some(2000),11,2 AND l_quantity@0 <= Some(3000),11,2 AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND l_quantity@0 >= Some(3000),11,2 AND l_quantity@0 <= Some(4000),11,2 AND p_size@2 <= 15, projection=[l_extendedprice@6, l_discount@7] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_partkey", index: 0 }, Column { name: "p_partkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4), input_partitions=4 - ProjectionExec: expr=[l_partkey@0 as l_partkey, l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: (l_quantity@1 >= Some(800),11,2 AND l_quantity@1 <= Some(1800),11,2 OR l_quantity@1 >= Some(2000),11,2 AND l_quantity@1 <= Some(3000),11,2 OR l_quantity@1 >= Some(3000),11,2 AND l_quantity@1 <= Some(4000),11,2) AND (l_shipmode@5 = AIR REG OR l_shipmode@5 = AIR) AND l_shipinstruct@4 = DELIVER IN PERSON - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, predicate=(l_shipmode@14 = AIR REG OR l_shipmode@14 = AIR) AND l_shipinstruct@13 = DELIVER IN PERSON AND (l_quantity@4 >= Some(800),11,2 AND l_quantity@4 <= Some(1800),11,2 OR l_quantity@4 >= Some(2000),11,2 AND l_quantity@4 <= Some(3000),11,2 OR l_quantity@4 >= Some(3000),11,2 AND l_quantity@4 <= Some(4000),11,2), pruning_predicate=(l_shipmode_min@0 <= AIR REG AND AIR REG <= l_shipmode_max@1 OR l_shipmode_min@0 <= AIR AND AIR <= l_shipmode_max@1) AND l_shipinstruct_min@2 <= DELIVER IN PERSON AND DELIVER IN PERSON <= l_shipinstruct_max@3 AND (l_quantity_max@4 >= Some(800),11,2 AND l_quantity_min@5 <= Some(1800),11,2 OR l_quantity_max@4 >= Some(2000),11,2 AND l_quantity_min@5 <= Some(3000),11,2 OR l_quantity_max@4 >= Some(3000),11,2 AND l_quantity_min@5 <= Some(4000),11,2), projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: (p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@2 <= 15) AND p_size@2 >= 1 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/part.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-3.parquet]]}, predicate=p_size@5 >= 1 AND (p_brand@3 = Brand#21 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@5 <= 5 OR p_brand@3 = Brand#13 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@5 <= 10 OR p_brand@3 = Brand#52 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@5 <= 15), pruning_predicate=p_size_max@0 >= 1 AND (p_brand_min@1 <= Brand#21 AND Brand#21 <= p_brand_max@2 AND (p_container_min@3 <= SM CASE AND SM CASE <= p_container_max@4 OR p_container_min@3 <= SM BOX AND SM BOX <= p_container_max@4 OR p_container_min@3 <= SM PACK AND SM PACK <= p_container_max@4 OR p_container_min@3 <= SM PKG AND SM PKG <= p_container_max@4) AND p_size_min@5 <= 5 OR p_brand_min@1 <= Brand#13 AND Brand#13 <= p_brand_max@2 AND (p_container_min@3 <= MED BAG AND MED BAG <= p_container_max@4 OR p_container_min@3 <= MED BOX AND MED BOX <= p_container_max@4 OR p_container_min@3 <= MED PKG AND MED PKG <= p_container_max@4 OR p_container_min@3 <= MED PACK AND MED PACK <= p_container_max@4) AND p_size_min@5 <= 10 OR p_brand_min@1 <= Brand#52 AND Brand#52 <= p_brand_max@2 AND (p_container_min@3 <= LG CASE AND LG CASE <= p_container_max@4 OR p_container_min@3 <= LG BOX AND LG BOX <= p_container_max@4 OR p_container_min@3 <= LG PACK AND LG PACK <= p_container_max@4 OR p_container_min@3 <= LG PKG AND LG PKG <= p_container_max@4) AND p_size_min@5 <= 15), projection=[p_partkey, p_brand, p_size, p_container] + FilterExec: (p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@2 <= 15) AND p_size@2 >= 1 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/part.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_brand, p_size, p_container], predicate=p_size@5 >= 1 AND (p_brand@3 = Brand#21 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@5 <= 5 OR p_brand@3 = Brand#13 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@5 <= 10 OR p_brand@3 = Brand#52 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@5 <= 15), pruning_predicate=CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_max@0 >= 1 END AND (CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#21 AND Brand#21 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM CASE AND SM CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM BOX AND SM BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PACK AND SM PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PKG AND SM PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 5 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#13 AND Brand#13 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BAG AND MED BAG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BOX AND MED BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PKG AND MED PKG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PACK AND MED PACK <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 10 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#52 AND Brand#52 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG CASE AND LG CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG BOX AND LG BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PACK AND LG PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PKG AND LG PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 15 END), required_guarantees=[] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4 + ProjectionExec: expr=[l_partkey@0 as l_partkey, l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: (l_quantity@1 >= Some(800),11,2 AND l_quantity@1 <= Some(1800),11,2 OR l_quantity@1 >= Some(2000),11,2 AND l_quantity@1 <= Some(3000),11,2 OR l_quantity@1 >= Some(3000),11,2 AND l_quantity@1 <= Some(4000),11,2) AND (l_shipmode@5 = AIR OR l_shipmode@5 = AIR REG) AND l_shipinstruct@4 = DELIVER IN PERSON + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], predicate=(l_shipmode@14 = AIR OR l_shipmode@14 = AIR REG) AND l_shipinstruct@13 = DELIVER IN PERSON AND (l_quantity@4 >= Some(800),11,2 AND l_quantity@4 <= Some(1800),11,2 OR l_quantity@4 >= Some(2000),11,2 AND l_quantity@4 <= Some(3000),11,2 OR l_quantity@4 >= Some(3000),11,2 AND l_quantity@4 <= Some(4000),11,2), pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR AND AIR <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR REG AND AIR REG <= l_shipmode_max@1 END) AND CASE WHEN l_shipinstruct_null_count@6 = l_shipinstruct_row_count@7 THEN false ELSE l_shipinstruct_min@4 <= DELIVER IN PERSON AND DELIVER IN PERSON <= l_shipinstruct_max@5 END AND (CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(800),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(1800),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(2000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(3000),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(3000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(4000),11,2 END), required_guarantees=[l_shipmode in (AIR REG, AIR), l_shipinstruct in (DELIVER IN PERSON)] RaySQL Plan =========== -Query Stage #0 (4 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) - ProjectionExec: expr=[l_partkey@0 as l_partkey, l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: (l_quantity@1 >= Some(800),11,2 AND l_quantity@1 <= Some(1800),11,2 OR l_quantity@1 >= Some(2000),11,2 AND l_quantity@1 <= Some(3000),11,2 OR l_quantity@1 >= Some(3000),11,2 AND l_quantity@1 <= Some(4000),11,2) AND (l_shipmode@5 = AIR REG OR l_shipmode@5 = AIR) AND l_shipinstruct@4 = DELIVER IN PERSON - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, predicate=(l_shipmode@14 = AIR REG OR l_shipmode@14 = AIR) AND l_shipinstruct@13 = DELIVER IN PERSON AND (l_quantity@4 >= Some(800),11,2 AND l_quantity@4 <= Some(1800),11,2 OR l_quantity@4 >= Some(2000),11,2 AND l_quantity@4 <= Some(3000),11,2 OR l_quantity@4 >= Some(3000),11,2 AND l_quantity@4 <= Some(4000),11,2), pruning_predicate=(l_shipmode_min@0 <= AIR REG AND AIR REG <= l_shipmode_max@1 OR l_shipmode_min@0 <= AIR AND AIR <= l_shipmode_max@1) AND l_shipinstruct_min@2 <= DELIVER IN PERSON AND DELIVER IN PERSON <= l_shipinstruct_max@3 AND (l_quantity_max@4 >= Some(800),11,2 AND l_quantity_min@5 <= Some(1800),11,2 OR l_quantity_max@4 >= Some(2000),11,2 AND l_quantity_min@5 <= Some(3000),11,2 OR l_quantity_max@4 >= Some(3000),11,2 AND l_quantity_min@5 <= Some(4000),11,2), projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode] - -Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) +Query Stage #0 (2 -> 4): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) CoalesceBatchesExec: target_batch_size=8192 FilterExec: (p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@2 <= 15) AND p_size@2 >= 1 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/part.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-3.parquet]]}, predicate=p_size@5 >= 1 AND (p_brand@3 = Brand#21 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@5 <= 5 OR p_brand@3 = Brand#13 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@5 <= 10 OR p_brand@3 = Brand#52 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@5 <= 15), pruning_predicate=p_size_max@0 >= 1 AND (p_brand_min@1 <= Brand#21 AND Brand#21 <= p_brand_max@2 AND (p_container_min@3 <= SM CASE AND SM CASE <= p_container_max@4 OR p_container_min@3 <= SM BOX AND SM BOX <= p_container_max@4 OR p_container_min@3 <= SM PACK AND SM PACK <= p_container_max@4 OR p_container_min@3 <= SM PKG AND SM PKG <= p_container_max@4) AND p_size_min@5 <= 5 OR p_brand_min@1 <= Brand#13 AND Brand#13 <= p_brand_max@2 AND (p_container_min@3 <= MED BAG AND MED BAG <= p_container_max@4 OR p_container_min@3 <= MED BOX AND MED BOX <= p_container_max@4 OR p_container_min@3 <= MED PKG AND MED PKG <= p_container_max@4 OR p_container_min@3 <= MED PACK AND MED PACK <= p_container_max@4) AND p_size_min@5 <= 10 OR p_brand_min@1 <= Brand#52 AND Brand#52 <= p_brand_max@2 AND (p_container_min@3 <= LG CASE AND LG CASE <= p_container_max@4 OR p_container_min@3 <= LG BOX AND LG BOX <= p_container_max@4 OR p_container_min@3 <= LG PACK AND LG PACK <= p_container_max@4 OR p_container_min@3 <= LG PKG AND LG PKG <= p_container_max@4) AND p_size_min@5 <= 15), projection=[p_partkey, p_brand, p_size, p_container] + ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/part.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_brand, p_size, p_container], predicate=p_size@5 >= 1 AND (p_brand@3 = Brand#21 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@5 <= 5 OR p_brand@3 = Brand#13 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@5 <= 10 OR p_brand@3 = Brand#52 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@5 <= 15), pruning_predicate=CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_max@0 >= 1 END AND (CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#21 AND Brand#21 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM CASE AND SM CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM BOX AND SM BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PACK AND SM PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PKG AND SM PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 5 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#13 AND Brand#13 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BAG AND MED BAG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BOX AND MED BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PKG AND MED PKG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PACK AND MED PACK <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 10 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#52 AND Brand#52 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG CASE AND LG CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG BOX AND LG BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PACK AND LG PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PKG AND LG PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 15 END), required_guarantees=[] + +Query Stage #1 (4 -> 4): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) + ProjectionExec: expr=[l_partkey@0 as l_partkey, l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: (l_quantity@1 >= Some(800),11,2 AND l_quantity@1 <= Some(1800),11,2 OR l_quantity@1 >= Some(2000),11,2 AND l_quantity@1 <= Some(3000),11,2 OR l_quantity@1 >= Some(3000),11,2 AND l_quantity@1 <= Some(4000),11,2) AND (l_shipmode@5 = AIR OR l_shipmode@5 = AIR REG) AND l_shipinstruct@4 = DELIVER IN PERSON + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], predicate=(l_shipmode@14 = AIR OR l_shipmode@14 = AIR REG) AND l_shipinstruct@13 = DELIVER IN PERSON AND (l_quantity@4 >= Some(800),11,2 AND l_quantity@4 <= Some(1800),11,2 OR l_quantity@4 >= Some(2000),11,2 AND l_quantity@4 <= Some(3000),11,2 OR l_quantity@4 >= Some(3000),11,2 AND l_quantity@4 <= Some(4000),11,2), pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR AND AIR <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR REG AND AIR REG <= l_shipmode_max@1 END) AND CASE WHEN l_shipinstruct_null_count@6 = l_shipinstruct_row_count@7 THEN false ELSE l_shipinstruct_min@4 <= DELIVER IN PERSON AND DELIVER IN PERSON <= l_shipinstruct_max@5 END AND (CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(800),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(1800),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(2000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(3000),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(3000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(4000),11,2 END), required_guarantees=[l_shipmode in (AIR REG, AIR), l_shipinstruct in (DELIVER IN PERSON)] -Query Stage #2 (4 -> 4): +Query Stage #2 (4 -> 1): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([], 4)) - AggregateExec: mode=Partial, gby=[], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_brand@3 = Brand#21 AND Use p_container@5 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND l_quantity@0 >= Some(800),11,2 AND l_quantity@0 <= Some(1800),11,2 AND p_size@4 <= 5 OR p_brand@3 = Brand#13 AND Use p_container@5 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND l_quantity@0 >= Some(2000),11,2 AND l_quantity@0 <= Some(3000),11,2 AND p_size@4 <= 10 OR p_brand@3 = Brand#52 AND Use p_container@5 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND l_quantity@0 >= Some(3000),11,2 AND l_quantity@0 <= Some(4000),11,2 AND p_size@4 <= 15 - ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, p_brand@5 as p_brand, p_size@6 as p_size, p_container@7 as p_container] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_partkey", index: 0 }, Column { name: "p_partkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) + AggregateExec: mode=Partial, gby=[], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], filter=p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND l_quantity@0 >= Some(800),11,2 AND l_quantity@0 <= Some(1800),11,2 AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND l_quantity@0 >= Some(2000),11,2 AND l_quantity@0 <= Some(3000),11,2 AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND l_quantity@0 >= Some(3000),11,2 AND l_quantity@0 <= Some(4000),11,2 AND p_size@2 <= 15, projection=[l_extendedprice@6, l_discount@7] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) Query Stage #3 (1 -> 1): -ProjectionExec: expr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@0 as revenue] - AggregateExec: mode=Final, gby=[], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] +ProjectionExec: expr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@0 as revenue] + AggregateExec: mode=Final, gby=[], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] CoalescePartitionsExec ShuffleReaderExec(stage_id=2, input_partitioning=Hash([], 4)) diff --git a/testdata/expected-plans/q2.txt b/testdata/expected-plans/q2.txt index 64df0f1..9b08cf5 100644 --- a/testdata/expected-plans/q2.txt +++ b/testdata/expected-plans/q2.txt @@ -4,12 +4,12 @@ DataFusion Logical Plan Limit: skip=0, fetch=100 Sort: supplier.s_acctbal DESC NULLS FIRST, nation.n_name ASC NULLS LAST, supplier.s_name ASC NULLS LAST, part.p_partkey ASC NULLS LAST, fetch=100 Projection: supplier.s_acctbal, supplier.s_name, nation.n_name, part.p_partkey, part.p_mfgr, supplier.s_address, supplier.s_phone, supplier.s_comment - Inner Join: part.p_partkey = __scalar_sq_1.ps_partkey, partsupp.ps_supplycost = __scalar_sq_1.__value - Projection: part.p_partkey, part.p_mfgr, partsupp.ps_supplycost, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_acctbal, supplier.s_comment, nation.n_name + Inner Join: part.p_partkey = __scalar_sq_1.ps_partkey, partsupp.ps_supplycost = __scalar_sq_1.min(partsupp.ps_supplycost) + Projection: part.p_partkey, part.p_mfgr, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_acctbal, supplier.s_comment, partsupp.ps_supplycost, nation.n_name Inner Join: nation.n_regionkey = region.r_regionkey - Projection: part.p_partkey, part.p_mfgr, partsupp.ps_supplycost, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_acctbal, supplier.s_comment, nation.n_name, nation.n_regionkey + Projection: part.p_partkey, part.p_mfgr, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_acctbal, supplier.s_comment, partsupp.ps_supplycost, nation.n_name, nation.n_regionkey Inner Join: supplier.s_nationkey = nation.n_nationkey - Projection: part.p_partkey, part.p_mfgr, partsupp.ps_supplycost, supplier.s_name, supplier.s_address, supplier.s_nationkey, supplier.s_phone, supplier.s_acctbal, supplier.s_comment + Projection: part.p_partkey, part.p_mfgr, supplier.s_name, supplier.s_address, supplier.s_nationkey, supplier.s_phone, supplier.s_acctbal, supplier.s_comment, partsupp.ps_supplycost Inner Join: partsupp.ps_suppkey = supplier.s_suppkey Projection: part.p_partkey, part.p_mfgr, partsupp.ps_suppkey, partsupp.ps_supplycost Inner Join: part.p_partkey = partsupp.ps_partkey @@ -23,8 +23,8 @@ Limit: skip=0, fetch=100 Filter: region.r_name = Utf8("ASIA") TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = Utf8("ASIA")] SubqueryAlias: __scalar_sq_1 - Projection: partsupp.ps_partkey, MIN(partsupp.ps_supplycost) AS __value - Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[MIN(partsupp.ps_supplycost)]] + Projection: min(partsupp.ps_supplycost), partsupp.ps_partkey + Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[min(partsupp.ps_supplycost)]] Projection: partsupp.ps_partkey, partsupp.ps_supplycost Inner Join: nation.n_regionkey = region.r_regionkey Projection: partsupp.ps_partkey, partsupp.ps_supplycost, nation.n_regionkey @@ -42,233 +42,226 @@ DataFusion Physical Plan ======================== GlobalLimitExec: skip=0, fetch=100 - SortPreservingMergeExec: [s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST] - SortExec: fetch=100, expr=[s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST] - ProjectionExec: expr=[s_acctbal@6 as s_acctbal, s_name@3 as s_name, n_name@8 as n_name, p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, s_address@4 as s_address, s_phone@5 as s_phone, s_comment@7 as s_comment] + SortPreservingMergeExec: [s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST], fetch=100 + SortExec: TopK(fetch=100), expr=[s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[s_acctbal@5 as s_acctbal, s_name@2 as s_name, n_name@7 as n_name, p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, s_address@3 as s_address, s_phone@4 as s_phone, s_comment@6 as s_comment] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "p_partkey", index: 0 }, Column { name: "ps_partkey", index: 0 }), (Column { name: "ps_supplycost", index: 2 }, Column { name: "__value", index: 1 })] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@1), (ps_supplycost@7, min(partsupp.ps_supplycost)@0)], projection=[p_partkey@0, p_mfgr@1, s_name@2, s_address@3, s_phone@4, s_acctbal@5, s_comment@6, n_name@8] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "p_partkey", index: 0 }, Column { name: "ps_supplycost", index: 2 }], 4), input_partitions=4 - ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, ps_supplycost@2 as ps_supplycost, s_name@3 as s_name, s_address@4 as s_address, s_phone@5 as s_phone, s_acctbal@6 as s_acctbal, s_comment@7 as s_comment, n_name@8 as n_name] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "n_regionkey", index: 9 }, Column { name: "r_regionkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "n_regionkey", index: 9 }], 4), input_partitions=4 - ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, ps_supplycost@2 as ps_supplycost, s_name@3 as s_name, s_address@4 as s_address, s_phone@6 as s_phone, s_acctbal@7 as s_acctbal, s_comment@8 as s_comment, n_name@10 as n_name, n_regionkey@11 as n_regionkey] + RepartitionExec: partitioning=Hash([p_partkey@0, ps_supplycost@7], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@9)], projection=[p_partkey@1, p_mfgr@2, s_name@3, s_address@4, s_phone@5, s_acctbal@6, s_comment@7, ps_supplycost@8, n_name@9] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 5 }, Column { name: "n_nationkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "s_nationkey", index: 5 }], 4), input_partitions=4 - ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, ps_supplycost@3 as ps_supplycost, s_name@5 as s_name, s_address@6 as s_address, s_nationkey@7 as s_nationkey, s_phone@8 as s_phone, s_acctbal@9 as s_acctbal, s_comment@10 as s_comment] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "ps_suppkey", index: 2 }, Column { name: "s_suppkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "ps_suppkey", index: 2 }], 4), input_partitions=4 - ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, ps_suppkey@3 as ps_suppkey, ps_supplycost@4 as ps_supplycost] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "p_partkey", index: 0 }, Column { name: "ps_partkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4), input_partitions=4 - ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_size@3 = 48 AND p_type@2 LIKE %TIN - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/part.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-3.parquet]]}, predicate=p_size@5 = 48 AND p_type@4 LIKE %TIN, pruning_predicate=p_size_min@0 <= 48 AND 48 <= p_size_max@1, projection=[p_partkey, p_mfgr, p_type, p_size] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4), input_partitions=4 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-3.parquet]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4), input_partitions=4 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-3.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4), input_partitions=1 - ParquetExec: limit=None, partitions={1 group: [[mnt/bigdata/tpch/sf10-parquet/nation.parquet/part-0.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] + FilterExec: r_name@1 = ASIA + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([n_regionkey@9], 4), input_partitions=4 + ProjectionExec: expr=[p_partkey@2 as p_partkey, p_mfgr@3 as p_mfgr, s_name@4 as s_name, s_address@5 as s_address, s_phone@6 as s_phone, s_acctbal@7 as s_acctbal, s_comment@8 as s_comment, ps_supplycost@9 as ps_supplycost, n_name@0 as n_name, n_regionkey@1 as n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@4)], projection=[n_name@1, n_regionkey@2, p_partkey@3, p_mfgr@4, s_name@5, s_address@6, s_phone@8, s_acctbal@9, s_comment@10, ps_supplycost@11] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = ASIA - ParquetExec: limit=None, partitions={1 group: [[mnt/bigdata/tpch/sf10-parquet/region.parquet/part-0.parquet]]}, predicate=r_name@1 = ASIA, pruning_predicate=r_name_min@0 <= ASIA AND ASIA <= r_name_max@1, projection=[r_regionkey, r_name] + RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1 + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_nationkey@4], 4), input_partitions=4 + ProjectionExec: expr=[p_partkey@6 as p_partkey, p_mfgr@7 as p_mfgr, s_name@0 as s_name, s_address@1 as s_address, s_nationkey@2 as s_nationkey, s_phone@3 as s_phone, s_acctbal@4 as s_acctbal, s_comment@5 as s_comment, ps_supplycost@8 as ps_supplycost] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@2)], projection=[s_name@1, s_address@2, s_nationkey@3, s_phone@4, s_acctbal@5, s_comment@6, p_partkey@7, p_mfgr@8, ps_supplycost@10] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([ps_suppkey@2], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_partkey@0, p_mfgr@1, ps_suppkey@3, ps_supplycost@4] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 + ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_size@3 = 48 AND p_type@2 LIKE %TIN + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/part.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_mfgr, p_type, p_size], predicate=p_size@5 = 48 AND p_type@4 LIKE %TIN, pruning_predicate=CASE WHEN p_size_null_count@2 = p_size_row_count@3 THEN false ELSE p_size_min@0 <= 48 AND 48 <= p_size_max@1 END, required_guarantees=[p_size in (48)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "ps_partkey", index: 0 }, Column { name: "__value", index: 1 }], 4), input_partitions=4 - ProjectionExec: expr=[ps_partkey@0 as ps_partkey, MIN(partsupp.ps_supplycost)@1 as __value] - AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[MIN(partsupp.ps_supplycost)] + RepartitionExec: partitioning=Hash([ps_partkey@1, min(partsupp.ps_supplycost)@0], 4), input_partitions=4 + ProjectionExec: expr=[min(partsupp.ps_supplycost)@1 as min(partsupp.ps_supplycost), ps_partkey@0 as ps_partkey] + AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[MIN(partsupp.ps_supplycost)] - ProjectionExec: expr=[ps_partkey@0 as ps_partkey, ps_supplycost@1 as ps_supplycost] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "n_regionkey", index: 2 }, Column { name: "r_regionkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "n_regionkey", index: 2 }], 4), input_partitions=4 - ProjectionExec: expr=[ps_partkey@0 as ps_partkey, ps_supplycost@1 as ps_supplycost, n_regionkey@4 as n_regionkey] - ProjectionExec: expr=[ps_partkey@2 as ps_partkey, ps_supplycost@3 as ps_supplycost, s_nationkey@4 as s_nationkey, n_nationkey@0 as n_nationkey, n_regionkey@1 as n_regionkey] + RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@2)], projection=[ps_partkey@1, ps_supplycost@2] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: r_name@1 = ASIA + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([n_regionkey@2], 4), input_partitions=4 + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, n_regionkey@0 as n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_regionkey@1, ps_partkey@2, ps_supplycost@3] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "n_nationkey", index: 0 }, Column { name: "s_nationkey", index: 2 })] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4), input_partitions=1 - ParquetExec: limit=None, partitions={1 group: [[mnt/bigdata/tpch/sf10-parquet/nation.parquet/part-0.parquet]]}, projection=[n_nationkey, n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4), input_partitions=4 - ProjectionExec: expr=[ps_partkey@0 as ps_partkey, ps_supplycost@2 as ps_supplycost, s_nationkey@4 as s_nationkey] - ProjectionExec: expr=[ps_partkey@2 as ps_partkey, ps_suppkey@3 as ps_suppkey, ps_supplycost@4 as ps_supplycost, s_suppkey@0 as s_suppkey, s_nationkey@1 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_suppkey", index: 0 }, Column { name: "ps_suppkey", index: 1 })] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4), input_partitions=4 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-3.parquet]]}, projection=[s_suppkey, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "ps_suppkey", index: 1 }], 4), input_partitions=4 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-3.parquet]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] + RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1 + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_regionkey] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = ASIA - ParquetExec: limit=None, partitions={1 group: [[mnt/bigdata/tpch/sf10-parquet/region.parquet/part-0.parquet]]}, predicate=r_name@1 = ASIA, pruning_predicate=r_name_min@0 <= ASIA AND ASIA <= r_name_max@1, projection=[r_regionkey, r_name] + RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4 + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_supplycost@4] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([ps_suppkey@1], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] RaySQL Plan =========== -Query Stage #0 (4 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr] +Query Stage #0 (1 -> 4): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_size@3 = 48 AND p_type@2 LIKE %TIN - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/part.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-3.parquet]]}, predicate=p_size@5 = 48 AND p_type@4 LIKE %TIN, pruning_predicate=p_size_min@0 <= 48 AND 48 <= p_size_max@1, projection=[p_partkey, p_mfgr, p_type, p_size] + FilterExec: r_name@1 = ASIA + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] -Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-3.parquet]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] +Query Stage #1 (1 -> 4): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] -Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "ps_suppkey", index: 2 }], 4)) - ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, ps_suppkey@3 as ps_suppkey, ps_supplycost@4 as ps_supplycost] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "p_partkey", index: 0 }, Column { name: "ps_partkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) +Query Stage #2 (2 -> 4): +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) + ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] -Query Stage #3 (4 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-3.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] +Query Stage #3 (2 -> 4): +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) + ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_size@3 = 48 AND p_type@2 LIKE %TIN + ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/part.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_mfgr, p_type, p_size], predicate=p_size@5 = 48 AND p_type@4 LIKE %TIN, pruning_predicate=CASE WHEN p_size_null_count@2 = p_size_row_count@3 THEN false ELSE p_size_min@0 <= 48 AND 48 <= p_size_max@1 END, required_guarantees=[p_size in (48)] Query Stage #4 (4 -> 4): -ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "s_nationkey", index: 5 }], 4)) - ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, ps_supplycost@3 as ps_supplycost, s_name@5 as s_name, s_address@6 as s_address, s_nationkey@7 as s_nationkey, s_phone@8 as s_phone, s_acctbal@9 as s_acctbal, s_comment@10 as s_comment] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "ps_suppkey", index: 2 }, Column { name: "s_suppkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "ps_suppkey", index: 2 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) +ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] -Query Stage #5 (1 -> 4): -ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ParquetExec: limit=None, partitions={1 group: [[mnt/bigdata/tpch/sf10-parquet/nation.parquet/part-0.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] +Query Stage #5 (4 -> 4): +ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "ps_suppkey", index: 2 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_partkey@0, p_mfgr@1, ps_suppkey@3, ps_supplycost@4] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) Query Stage #6 (4 -> 4): -ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "n_regionkey", index: 9 }], 4)) - ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, ps_supplycost@2 as ps_supplycost, s_name@3 as s_name, s_address@4 as s_address, s_phone@6 as s_phone, s_acctbal@7 as s_acctbal, s_comment@8 as s_comment, n_name@10 as n_name, n_regionkey@11 as n_regionkey] +ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "s_nationkey", index: 4 }], 4)) + ProjectionExec: expr=[p_partkey@6 as p_partkey, p_mfgr@7 as p_mfgr, s_name@0 as s_name, s_address@1 as s_address, s_nationkey@2 as s_nationkey, s_phone@3 as s_phone, s_acctbal@4 as s_acctbal, s_comment@5 as s_comment, ps_supplycost@8 as ps_supplycost] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 5 }, Column { name: "n_nationkey", index: 0 })] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@2)], projection=[s_name@1, s_address@2, s_nationkey@3, s_phone@4, s_acctbal@5, s_comment@6, p_partkey@7, p_mfgr@8, ps_supplycost@10] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "s_nationkey", index: 5 }], 4)) + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "ps_suppkey", index: 2 }], 4)) -Query Stage #7 (1 -> 4): -ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = ASIA - ParquetExec: limit=None, partitions={1 group: [[mnt/bigdata/tpch/sf10-parquet/region.parquet/part-0.parquet]]}, predicate=r_name@1 = ASIA, pruning_predicate=r_name_min@0 <= ASIA AND ASIA <= r_name_max@1, projection=[r_regionkey, r_name] - -Query Stage #8 (4 -> 4): -ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }, Column { name: "ps_supplycost", index: 2 }], 4)) - ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, ps_supplycost@2 as ps_supplycost, s_name@3 as s_name, s_address@4 as s_address, s_phone@5 as s_phone, s_acctbal@6 as s_acctbal, s_comment@7 as s_comment, n_name@8 as n_name] +Query Stage #7 (4 -> 4): +ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "n_regionkey", index: 9 }], 4)) + ProjectionExec: expr=[p_partkey@2 as p_partkey, p_mfgr@3 as p_mfgr, s_name@4 as s_name, s_address@5 as s_address, s_phone@6 as s_phone, s_acctbal@7 as s_acctbal, s_comment@8 as s_comment, ps_supplycost@9 as ps_supplycost, n_name@0 as n_name, n_regionkey@1 as n_regionkey] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "n_regionkey", index: 9 }, Column { name: "r_regionkey", index: 0 })] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@4)], projection=[n_name@1, n_regionkey@2, p_partkey@3, p_mfgr@4, s_name@5, s_address@6, s_phone@8, s_acctbal@9, s_comment@10, ps_supplycost@11] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "n_regionkey", index: 9 }], 4)) + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) + ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "s_nationkey", index: 4 }], 4)) + +Query Stage #8 (4 -> 4): +ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }, Column { name: "ps_supplycost", index: 7 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@9)], projection=[p_partkey@1, p_mfgr@2, s_name@3, s_address@4, s_phone@5, s_acctbal@6, s_comment@7, ps_supplycost@8, n_name@9] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "n_regionkey", index: 9 }], 4)) Query Stage #9 (1 -> 4): -ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ParquetExec: limit=None, partitions={1 group: [[mnt/bigdata/tpch/sf10-parquet/nation.parquet/part-0.parquet]]}, projection=[n_nationkey, n_regionkey] +ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: r_name@1 = ASIA + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] -Query Stage #10 (4 -> 4): -ShuffleWriterExec(stage_id=10, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-3.parquet]]}, projection=[s_suppkey, s_nationkey] +Query Stage #10 (1 -> 4): +ShuffleWriterExec(stage_id=10, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_regionkey] -Query Stage #11 (4 -> 4): -ShuffleWriterExec(stage_id=11, output_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }], 4)) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-3.parquet]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] +Query Stage #11 (2 -> 4): +ShuffleWriterExec(stage_id=11, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) + ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] Query Stage #12 (4 -> 4): -ShuffleWriterExec(stage_id=12, output_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) - ProjectionExec: expr=[ps_partkey@0 as ps_partkey, ps_supplycost@2 as ps_supplycost, s_nationkey@4 as s_nationkey] - ProjectionExec: expr=[ps_partkey@2 as ps_partkey, ps_suppkey@3 as ps_suppkey, ps_supplycost@4 as ps_supplycost, s_suppkey@0 as s_suppkey, s_nationkey@1 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_suppkey", index: 0 }, Column { name: "ps_suppkey", index: 1 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=10, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=11, input_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }], 4)) +ShuffleWriterExec(stage_id=12, output_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }], 4)) + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] Query Stage #13 (4 -> 4): -ShuffleWriterExec(stage_id=13, output_partitioning=Hash([Column { name: "n_regionkey", index: 2 }], 4)) - ProjectionExec: expr=[ps_partkey@0 as ps_partkey, ps_supplycost@1 as ps_supplycost, n_regionkey@4 as n_regionkey] - ProjectionExec: expr=[ps_partkey@2 as ps_partkey, ps_supplycost@3 as ps_supplycost, s_nationkey@4 as s_nationkey, n_nationkey@0 as n_nationkey, n_regionkey@1 as n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "n_nationkey", index: 0 }, Column { name: "s_nationkey", index: 2 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=12, input_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) +ShuffleWriterExec(stage_id=13, output_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_supplycost@4] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=11, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=12, input_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }], 4)) -Query Stage #14 (1 -> 4): -ShuffleWriterExec(stage_id=14, output_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] +Query Stage #14 (4 -> 4): +ShuffleWriterExec(stage_id=14, output_partitioning=Hash([Column { name: "n_regionkey", index: 2 }], 4)) + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, n_regionkey@0 as n_regionkey] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = ASIA - ParquetExec: limit=None, partitions={1 group: [[mnt/bigdata/tpch/sf10-parquet/region.parquet/part-0.parquet]]}, predicate=r_name@1 = ASIA, pruning_predicate=r_name_min@0 <= ASIA AND ASIA <= r_name_max@1, projection=[r_regionkey, r_name] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_regionkey@1, ps_partkey@2, ps_supplycost@3] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=10, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=13, input_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) Query Stage #15 (4 -> 4): ShuffleWriterExec(stage_id=15, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) - AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[MIN(partsupp.ps_supplycost)] - ProjectionExec: expr=[ps_partkey@0 as ps_partkey, ps_supplycost@1 as ps_supplycost] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "n_regionkey", index: 2 }, Column { name: "r_regionkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=13, input_partitioning=Hash([Column { name: "n_regionkey", index: 2 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=14, input_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) + AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@2)], projection=[ps_partkey@1, ps_supplycost@2] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=14, input_partitioning=Hash([Column { name: "n_regionkey", index: 2 }], 4)) Query Stage #16 (4 -> 4): -ShuffleWriterExec(stage_id=16, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }, Column { name: "__value", index: 1 }], 4)) - ProjectionExec: expr=[ps_partkey@0 as ps_partkey, MIN(partsupp.ps_supplycost)@1 as __value] - AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[MIN(partsupp.ps_supplycost)] +ShuffleWriterExec(stage_id=16, output_partitioning=Hash([Column { name: "ps_partkey", index: 1 }, Column { name: "min(partsupp.ps_supplycost)", index: 0 }], 4)) + ProjectionExec: expr=[min(partsupp.ps_supplycost)@1 as min(partsupp.ps_supplycost), ps_partkey@0 as ps_partkey] + AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)] CoalesceBatchesExec: target_batch_size=8192 ShuffleReaderExec(stage_id=15, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) Query Stage #17 (4 -> 4): ShuffleWriterExec(stage_id=17, output_partitioning=Hash([Column { name: "p_partkey", index: 3 }], 4)) - SortExec: fetch=100, expr=[s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST] - ProjectionExec: expr=[s_acctbal@6 as s_acctbal, s_name@3 as s_name, n_name@8 as n_name, p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, s_address@4 as s_address, s_phone@5 as s_phone, s_comment@7 as s_comment] + SortExec: TopK(fetch=100), expr=[s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[s_acctbal@5 as s_acctbal, s_name@2 as s_name, n_name@7 as n_name, p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, s_address@3 as s_address, s_phone@4 as s_phone, s_comment@6 as s_comment] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "p_partkey", index: 0 }, Column { name: "ps_partkey", index: 0 }), (Column { name: "ps_supplycost", index: 2 }, Column { name: "__value", index: 1 })] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@1), (ps_supplycost@7, min(partsupp.ps_supplycost)@0)], projection=[p_partkey@0, p_mfgr@1, s_name@2, s_address@3, s_phone@4, s_acctbal@5, s_comment@6, n_name@8] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }, Column { name: "ps_supplycost", index: 2 }], 4)) + ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }, Column { name: "ps_supplycost", index: 7 }], 4)) CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=16, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }, Column { name: "__value", index: 1 }], 4)) + ShuffleReaderExec(stage_id=16, input_partitioning=Hash([Column { name: "ps_partkey", index: 1 }, Column { name: "min(partsupp.ps_supplycost)", index: 0 }], 4)) Query Stage #18 (1 -> 1): GlobalLimitExec: skip=0, fetch=100 - SortPreservingMergeExec: [s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST] + SortPreservingMergeExec: [s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST], fetch=100 ShuffleReaderExec(stage_id=17, input_partitioning=Hash([Column { name: "p_partkey", index: 3 }], 4)) diff --git a/testdata/expected-plans/q20.txt b/testdata/expected-plans/q20.txt index 3cdbf1c..8c9f1b5 100644 --- a/testdata/expected-plans/q20.txt +++ b/testdata/expected-plans/q20.txt @@ -11,154 +11,142 @@ Sort: supplier.s_name ASC NULLS LAST Filter: nation.n_name = Utf8("KENYA") TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("KENYA")] SubqueryAlias: __correlated_sq_1 - Projection: partsupp.ps_suppkey AS ps_suppkey - Filter: CAST(partsupp.ps_availqty AS Float64) > __scalar_sq_1.__value - Projection: partsupp.ps_suppkey, partsupp.ps_availqty, __scalar_sq_1.__value - Inner Join: partsupp.ps_partkey = __scalar_sq_1.l_partkey, partsupp.ps_suppkey = __scalar_sq_1.l_suppkey - LeftSemi Join: partsupp.ps_partkey = __correlated_sq_2.p_partkey - TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty] - SubqueryAlias: __correlated_sq_2 - Projection: part.p_partkey AS p_partkey - Filter: part.p_name LIKE Utf8("blanched%") - TableScan: part projection=[p_partkey, p_name], partial_filters=[part.p_name LIKE Utf8("blanched%")] - SubqueryAlias: __scalar_sq_1 - Projection: lineitem.l_partkey, lineitem.l_suppkey, Float64(0.5) * CAST(SUM(lineitem.l_quantity) AS Float64) AS __value - Aggregate: groupBy=[[lineitem.l_partkey, lineitem.l_suppkey]], aggr=[[SUM(lineitem.l_quantity)]] - Projection: lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_quantity - Filter: lineitem.l_shipdate >= Date32("8401") AND lineitem.l_shipdate < Date32("8766") - TableScan: lineitem projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], partial_filters=[lineitem.l_shipdate >= Date32("8401"), lineitem.l_shipdate < Date32("8766")] + Projection: partsupp.ps_suppkey + Inner Join: partsupp.ps_partkey = __scalar_sq_3.l_partkey, partsupp.ps_suppkey = __scalar_sq_3.l_suppkey Filter: CAST(partsupp.ps_availqty AS Float64) > __scalar_sq_3.Float64(0.5) * sum(lineitem.l_quantity) + LeftSemi Join: partsupp.ps_partkey = __correlated_sq_2.p_partkey + TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty] + SubqueryAlias: __correlated_sq_2 + Projection: part.p_partkey + Filter: part.p_name LIKE Utf8("blanched%") + TableScan: part projection=[p_partkey, p_name], partial_filters=[part.p_name LIKE Utf8("blanched%")] + SubqueryAlias: __scalar_sq_3 + Projection: Float64(0.5) * CAST(sum(lineitem.l_quantity) AS Float64), lineitem.l_partkey, lineitem.l_suppkey + Aggregate: groupBy=[[lineitem.l_partkey, lineitem.l_suppkey]], aggr=[[sum(lineitem.l_quantity)]] + Projection: lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_quantity + Filter: lineitem.l_shipdate >= Date32("1993-01-01") AND lineitem.l_shipdate < Date32("1994-01-01") + TableScan: lineitem projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], partial_filters=[lineitem.l_shipdate >= Date32("1993-01-01"), lineitem.l_shipdate < Date32("1994-01-01")] DataFusion Physical Plan ======================== SortPreservingMergeExec: [s_name@0 ASC NULLS LAST] - SortExec: expr=[s_name@0 ASC NULLS LAST] - ProjectionExec: expr=[s_name@1 as s_name, s_address@2 as s_address] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(Column { name: "s_suppkey", index: 0 }, Column { name: "ps_suppkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4), input_partitions=4 - ProjectionExec: expr=[s_suppkey@0 as s_suppkey, s_name@1 as s_name, s_address@2 as s_address] + SortExec: expr=[s_name@0 ASC NULLS LAST], preserve_partitioning=[true] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_name@1, s_address@2] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[s_suppkey@1, s_name@2, s_address@3] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 + ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = KENYA + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = KENYA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= KENYA AND KENYA <= n_name_max@1 END, required_guarantees=[n_name in (KENYA)] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 3 }, Column { name: "n_nationkey", index: 0 })] + RepartitionExec: partitioning=Hash([s_nationkey@3], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_partkey@0, l_partkey@1), (ps_suppkey@1, l_suppkey@2)], filter=CAST(ps_availqty@0 AS Float64) > Float64(0.5) * sum(lineitem.l_quantity)@1, projection=[ps_suppkey@1] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([ps_partkey@0, ps_suppkey@1], 4), input_partitions=4 CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4), input_partitions=4 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-3.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey] + HashJoinExec: mode=Partitioned, join_type=RightSemi, on=[(p_partkey@0, ps_partkey@0)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 + ProjectionExec: expr=[p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_name@1 LIKE blanched% + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/part.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE blanched% + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_availqty] + ProjectionExec: expr=[0.5 * CAST(sum(lineitem.l_quantity)@2 AS Float64) as Float64(0.5) * sum(lineitem.l_quantity), l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey] + AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 - ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + RepartitionExec: partitioning=Hash([l_partkey@0, l_suppkey@1], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] + ProjectionExec: expr=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey, l_quantity@2 as l_quantity] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = KENYA - ParquetExec: limit=None, partitions={1 group: [[mnt/bigdata/tpch/sf10-parquet/nation.parquet/part-0.parquet]]}, predicate=n_name@1 = KENYA, pruning_predicate=n_name_min@0 <= KENYA AND KENYA <= n_name_max@1, projection=[n_nationkey, n_name] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4), input_partitions=4 - ProjectionExec: expr=[ps_suppkey@0 as ps_suppkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: CAST(ps_availqty@1 AS Float64) > __value@2 - ProjectionExec: expr=[ps_suppkey@1 as ps_suppkey, ps_availqty@2 as ps_availqty, __value@5 as __value] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "ps_partkey", index: 0 }, Column { name: "l_partkey", index: 0 }), (Column { name: "ps_suppkey", index: 1 }, Column { name: "l_suppkey", index: 1 })] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "ps_partkey", index: 0 }, Column { name: "ps_suppkey", index: 1 }], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(Column { name: "ps_partkey", index: 0 }, Column { name: "p_partkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4), input_partitions=4 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-3.parquet]]}, projection=[ps_partkey, ps_suppkey, ps_availqty] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4), input_partitions=4 - ProjectionExec: expr=[p_partkey@0 as p_partkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_name@1 LIKE blanched% - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/part.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-3.parquet]]}, predicate=p_name@1 LIKE blanched%, projection=[p_partkey, p_name] - ProjectionExec: expr=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey, 0.5 * CAST(SUM(lineitem.l_quantity)@2 AS Float64) as __value] - AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[SUM(lineitem.l_quantity)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "l_partkey", index: 0 }, Column { name: "l_suppkey", index: 1 }], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[SUM(lineitem.l_quantity)] - ProjectionExec: expr=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey, l_quantity@2 as l_quantity] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@3 >= 8401 AND l_shipdate@3 < 8766 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, predicate=l_shipdate@10 >= 8401 AND l_shipdate@10 < 8766, pruning_predicate=l_shipdate_max@0 >= 8401 AND l_shipdate_min@1 < 8766, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate] + FilterExec: l_shipdate@3 >= 1993-01-01 AND l_shipdate@3 < 1994-01-01 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], predicate=l_shipdate@10 >= 1993-01-01 AND l_shipdate@10 < 1994-01-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1993-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1994-01-01 END, required_guarantees=[] RaySQL Plan =========== -Query Stage #0 (4 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4)) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-3.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey] - -Query Stage #1 (1 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) +Query Stage #0 (1 -> 4): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) ProjectionExec: expr=[n_nationkey@0 as n_nationkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = KENYA - ParquetExec: limit=None, partitions={1 group: [[mnt/bigdata/tpch/sf10-parquet/nation.parquet/part-0.parquet]]}, predicate=n_name@1 = KENYA, pruning_predicate=n_name_min@0 <= KENYA AND KENYA <= n_name_max@1, projection=[n_nationkey, n_name] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = KENYA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= KENYA AND KENYA <= n_name_max@1 END, required_guarantees=[n_name in (KENYA)] + +Query Stage #1 (2 -> 4): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4)) + ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey] Query Stage #2 (4 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ProjectionExec: expr=[s_suppkey@0 as s_suppkey, s_name@1 as s_name, s_address@2 as s_address] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 3 }, Column { name: "n_nationkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - -Query Stage #3 (4 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-3.parquet]]}, projection=[ps_partkey, ps_suppkey, ps_availqty] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[s_suppkey@1, s_name@2, s_address@3] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4)) -Query Stage #4 (4 -> 4): -ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) +Query Stage #3 (2 -> 4): +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) ProjectionExec: expr=[p_partkey@0 as p_partkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_name@1 LIKE blanched% - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/part.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-3.parquet]]}, predicate=p_name@1 LIKE blanched%, projection=[p_partkey, p_name] + ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/part.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE blanched% + +Query Stage #4 (4 -> 4): +ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_availqty] Query Stage #5 (4 -> 4): ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }, Column { name: "ps_suppkey", index: 1 }], 4)) CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(Column { name: "ps_partkey", index: 0 }, Column { name: "p_partkey", index: 0 })] + HashJoinExec: mode=Partitioned, join_type=RightSemi, on=[(p_partkey@0, ps_partkey@0)] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) + ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) Query Stage #6 (4 -> 4): ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "l_partkey", index: 0 }, Column { name: "l_suppkey", index: 1 }], 4)) - AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[SUM(lineitem.l_quantity)] + AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] ProjectionExec: expr=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey, l_quantity@2 as l_quantity] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@3 >= 8401 AND l_shipdate@3 < 8766 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, predicate=l_shipdate@10 >= 8401 AND l_shipdate@10 < 8766, pruning_predicate=l_shipdate_max@0 >= 8401 AND l_shipdate_min@1 < 8766, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate] + FilterExec: l_shipdate@3 >= 1993-01-01 AND l_shipdate@3 < 1994-01-01 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], predicate=l_shipdate@10 >= 1993-01-01 AND l_shipdate@10 < 1994-01-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1993-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1994-01-01 END, required_guarantees=[] Query Stage #7 (4 -> 4): ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4)) - ProjectionExec: expr=[ps_suppkey@0 as ps_suppkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: CAST(ps_availqty@1 AS Float64) > __value@2 - ProjectionExec: expr=[ps_suppkey@1 as ps_suppkey, ps_availqty@2 as ps_availqty, __value@5 as __value] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_partkey@0, l_partkey@1), (ps_suppkey@1, l_suppkey@2)], filter=CAST(ps_availqty@0 AS Float64) > Float64(0.5) * sum(lineitem.l_quantity)@1, projection=[ps_suppkey@1] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }, Column { name: "ps_suppkey", index: 1 }], 4)) + ProjectionExec: expr=[0.5 * CAST(sum(lineitem.l_quantity)@2 AS Float64) as Float64(0.5) * sum(lineitem.l_quantity), l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey] + AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "ps_partkey", index: 0 }, Column { name: "l_partkey", index: 0 }), (Column { name: "ps_suppkey", index: 1 }, Column { name: "l_suppkey", index: 1 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }, Column { name: "ps_suppkey", index: 1 }], 4)) - ProjectionExec: expr=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey, 0.5 * CAST(SUM(lineitem.l_quantity)@2 AS Float64) as __value] - AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[SUM(lineitem.l_quantity)] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "l_partkey", index: 0 }, Column { name: "l_suppkey", index: 1 }], 4)) + ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "l_partkey", index: 0 }, Column { name: "l_suppkey", index: 1 }], 4)) -Query Stage #8 (4 -> 4): +Query Stage #8 (4 -> 1): ShuffleWriterExec(stage_id=8, output_partitioning=Hash([], 4)) - SortExec: expr=[s_name@0 ASC NULLS LAST] - ProjectionExec: expr=[s_name@1 as s_name, s_address@2 as s_address] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(Column { name: "s_suppkey", index: 0 }, Column { name: "ps_suppkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4)) + SortExec: expr=[s_name@0 ASC NULLS LAST], preserve_partitioning=[true] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_name@1, s_address@2] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4)) Query Stage #9 (4 -> 1): SortPreservingMergeExec: [s_name@0 ASC NULLS LAST] diff --git a/testdata/expected-plans/q21.txt b/testdata/expected-plans/q21.txt index 557f95f..c2390e8 100644 --- a/testdata/expected-plans/q21.txt +++ b/testdata/expected-plans/q21.txt @@ -3,11 +3,11 @@ DataFusion Logical Plan Limit: skip=0, fetch=100 Sort: numwait DESC NULLS FIRST, supplier.s_name ASC NULLS LAST, fetch=100 - Projection: supplier.s_name, COUNT(UInt8(1)) AS numwait - Aggregate: groupBy=[[supplier.s_name]], aggr=[[COUNT(UInt8(1))]] + Projection: supplier.s_name, count(*) AS numwait + Aggregate: groupBy=[[supplier.s_name]], aggr=[[count(Int64(1)) AS count(*)]] Projection: supplier.s_name - LeftAnti Join: l1.l_orderkey = l3.l_orderkey Filter: l3.l_suppkey != l1.l_suppkey - LeftSemi Join: l1.l_orderkey = l2.l_orderkey Filter: l2.l_suppkey != l1.l_suppkey + LeftAnti Join: l1.l_orderkey = __correlated_sq_2.l_orderkey Filter: __correlated_sq_2.l_suppkey != l1.l_suppkey + LeftSemi Join: l1.l_orderkey = __correlated_sq_1.l_orderkey Filter: __correlated_sq_1.l_suppkey != l1.l_suppkey Projection: supplier.s_name, l1.l_orderkey, l1.l_suppkey Inner Join: supplier.s_nationkey = nation.n_nationkey Projection: supplier.s_name, supplier.s_nationkey, l1.l_orderkey, l1.l_suppkey @@ -25,170 +25,165 @@ Limit: skip=0, fetch=100 Projection: nation.n_nationkey Filter: nation.n_name = Utf8("ARGENTINA") TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("ARGENTINA")] - SubqueryAlias: l2 - TableScan: lineitem projection=[l_orderkey, l_suppkey] - SubqueryAlias: l3 - Projection: lineitem.l_orderkey, lineitem.l_suppkey - Filter: lineitem.l_receiptdate > lineitem.l_commitdate - TableScan: lineitem projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], partial_filters=[lineitem.l_receiptdate > lineitem.l_commitdate] + SubqueryAlias: __correlated_sq_1 + SubqueryAlias: l2 + TableScan: lineitem projection=[l_orderkey, l_suppkey] + SubqueryAlias: __correlated_sq_2 + SubqueryAlias: l3 + Projection: lineitem.l_orderkey, lineitem.l_suppkey + Filter: lineitem.l_receiptdate > lineitem.l_commitdate + TableScan: lineitem projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], partial_filters=[lineitem.l_receiptdate > lineitem.l_commitdate] DataFusion Physical Plan ======================== GlobalLimitExec: skip=0, fetch=100 - SortPreservingMergeExec: [numwait@1 DESC,s_name@0 ASC NULLS LAST] - SortExec: fetch=100, expr=[numwait@1 DESC,s_name@0 ASC NULLS LAST] - ProjectionExec: expr=[s_name@0 as s_name, COUNT(UInt8(1))@1 as numwait] - AggregateExec: mode=FinalPartitioned, gby=[s_name@0 as s_name], aggr=[COUNT(UInt8(1))] + SortPreservingMergeExec: [numwait@1 DESC,s_name@0 ASC NULLS LAST], fetch=100 + SortExec: TopK(fetch=100), expr=[numwait@1 DESC,s_name@0 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[s_name@0 as s_name, count(*)@1 as numwait] + AggregateExec: mode=FinalPartitioned, gby=[s_name@0 as s_name], aggr=[count(*)] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "s_name", index: 0 }], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[s_name@0 as s_name], aggr=[COUNT(UInt8(1))] - ProjectionExec: expr=[s_name@0 as s_name] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(Column { name: "l_orderkey", index: 1 }, Column { name: "l_orderkey", index: 0 })], filter=BinaryExpr { left: Column { name: "l_suppkey", index: 1 }, op: NotEq, right: Column { name: "l_suppkey", index: 0 } } - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(Column { name: "l_orderkey", index: 1 }, Column { name: "l_orderkey", index: 0 })], filter=BinaryExpr { left: Column { name: "l_suppkey", index: 1 }, op: NotEq, right: Column { name: "l_suppkey", index: 0 } } - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "l_orderkey", index: 1 }], 4), input_partitions=4 - ProjectionExec: expr=[s_name@0 as s_name, l_orderkey@2 as l_orderkey, l_suppkey@3 as l_suppkey] + RepartitionExec: partitioning=Hash([s_name@0], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[s_name@0 as s_name], aggr=[count(*)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0, projection=[s_name@0] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0 + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_orderkey@1], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@1)], projection=[s_name@1, l_orderkey@3, l_suppkey@4] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 + ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = ARGENTINA + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ARGENTINA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ARGENTINA AND ARGENTINA <= n_name_max@1 END, required_guarantees=[n_name in (ARGENTINA)] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 1 }, Column { name: "n_nationkey", index: 0 })] + RepartitionExec: partitioning=Hash([s_nationkey@1], 4), input_partitions=4 CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "s_nationkey", index: 1 }], 4), input_partitions=4 - ProjectionExec: expr=[s_name@0 as s_name, s_nationkey@1 as s_nationkey, l_orderkey@2 as l_orderkey, l_suppkey@3 as l_suppkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_orderkey", index: 2 }, Column { name: "o_orderkey", index: 0 })] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@2)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 + ProjectionExec: expr=[o_orderkey@0 as o_orderkey] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "l_orderkey", index: 2 }], 4), input_partitions=4 - ProjectionExec: expr=[s_name@1 as s_name, s_nationkey@2 as s_nationkey, l_orderkey@3 as l_orderkey, l_suppkey@4 as l_suppkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_suppkey", index: 0 }, Column { name: "l_suppkey", index: 1 })] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4), input_partitions=4 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-3.parquet]]}, projection=[s_suppkey, s_name, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4), input_partitions=4 - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_receiptdate@3 > l_commitdate@2 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, predicate=l_receiptdate@12 > l_commitdate@11, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4), input_partitions=4 - ProjectionExec: expr=[o_orderkey@0 as o_orderkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderstatus@1 = F - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-3.parquet]]}, predicate=o_orderstatus@2 = F, pruning_predicate=o_orderstatus_min@0 <= F AND F <= o_orderstatus_max@1, projection=[o_orderkey, o_orderstatus] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 - ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + FilterExec: o_orderstatus@1 = F + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_orderstatus], predicate=o_orderstatus@2 = F, pruning_predicate=CASE WHEN o_orderstatus_null_count@2 = o_orderstatus_row_count@3 THEN false ELSE o_orderstatus_min@0 <= F AND F <= o_orderstatus_max@1 END, required_guarantees=[o_orderstatus in (F)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_orderkey@2], 4), input_partitions=4 CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = ARGENTINA - ParquetExec: limit=None, partitions={1 group: [[mnt/bigdata/tpch/sf10-parquet/nation.parquet/part-0.parquet]]}, predicate=n_name@1 = ARGENTINA, pruning_predicate=n_name_min@0 <= ARGENTINA AND ARGENTINA <= n_name_max@1, projection=[n_nationkey, n_name] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_suppkey@1], 4), input_partitions=4 + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_receiptdate@3 > l_commitdate@2 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4), input_partitions=4 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, projection=[l_orderkey, l_suppkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4), input_partitions=4 - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_receiptdate@3 > l_commitdate@2 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, predicate=l_receiptdate@12 > l_commitdate@11, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate] + FilterExec: l_receiptdate@3 > l_commitdate@2 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 RaySQL Plan =========== -Query Stage #0 (4 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-3.parquet]]}, projection=[s_suppkey, s_name, s_nationkey] +Query Stage #0 (1 -> 4): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = ARGENTINA + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ARGENTINA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ARGENTINA AND ARGENTINA <= n_name_max@1 END, required_guarantees=[n_name in (ARGENTINA)] Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4)) - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) + ProjectionExec: expr=[o_orderkey@0 as o_orderkey] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_receiptdate@3 > l_commitdate@2 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, predicate=l_receiptdate@12 > l_commitdate@11, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate] + FilterExec: o_orderstatus@1 = F + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_orderstatus], predicate=o_orderstatus@2 = F, pruning_predicate=CASE WHEN o_orderstatus_null_count@2 = o_orderstatus_row_count@3 THEN false ELSE o_orderstatus_min@0 <= F AND F <= o_orderstatus_max@1 END, required_guarantees=[o_orderstatus in (F)] -Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "l_orderkey", index: 2 }], 4)) - ProjectionExec: expr=[s_name@1 as s_name, s_nationkey@2 as s_nationkey, l_orderkey@3 as l_orderkey, l_suppkey@4 as l_suppkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_suppkey", index: 0 }, Column { name: "l_suppkey", index: 1 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4)) +Query Stage #2 (2 -> 4): +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) + ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_nationkey] Query Stage #3 (4 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - ProjectionExec: expr=[o_orderkey@0 as o_orderkey] +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4)) + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderstatus@1 = F - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-3.parquet]]}, predicate=o_orderstatus@2 = F, pruning_predicate=o_orderstatus_min@0 <= F AND F <= o_orderstatus_max@1, projection=[o_orderkey, o_orderstatus] + FilterExec: l_receiptdate@3 > l_commitdate@2 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 Query Stage #4 (4 -> 4): -ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "s_nationkey", index: 1 }], 4)) - ProjectionExec: expr=[s_name@0 as s_name, s_nationkey@1 as s_nationkey, l_orderkey@2 as l_orderkey, l_suppkey@3 as l_suppkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_orderkey", index: 2 }, Column { name: "o_orderkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "l_orderkey", index: 2 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) +ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "l_orderkey", index: 2 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4)) -Query Stage #5 (1 -> 4): -ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ProjectionExec: expr=[n_nationkey@0 as n_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = ARGENTINA - ParquetExec: limit=None, partitions={1 group: [[mnt/bigdata/tpch/sf10-parquet/nation.parquet/part-0.parquet]]}, predicate=n_name@1 = ARGENTINA, pruning_predicate=n_name_min@0 <= ARGENTINA AND ARGENTINA <= n_name_max@1, projection=[n_nationkey, n_name] +Query Stage #5 (4 -> 4): +ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "s_nationkey", index: 1 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@2)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "l_orderkey", index: 2 }], 4)) Query Stage #6 (4 -> 4): ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "l_orderkey", index: 1 }], 4)) - ProjectionExec: expr=[s_name@0 as s_name, l_orderkey@2 as l_orderkey, l_suppkey@3 as l_suppkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 1 }, Column { name: "n_nationkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "s_nationkey", index: 1 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@1)], projection=[s_name@1, l_orderkey@3, l_suppkey@4] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "s_nationkey", index: 1 }], 4)) Query Stage #7 (4 -> 4): ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, projection=[l_orderkey, l_suppkey] + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey] Query Stage #8 (4 -> 4): ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_receiptdate@3 > l_commitdate@2 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, predicate=l_receiptdate@12 > l_commitdate@11, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate] + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 Query Stage #9 (4 -> 4): ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "s_name", index: 0 }], 4)) - AggregateExec: mode=Partial, gby=[s_name@0 as s_name], aggr=[COUNT(UInt8(1))] - ProjectionExec: expr=[s_name@0 as s_name] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(Column { name: "l_orderkey", index: 1 }, Column { name: "l_orderkey", index: 0 })], filter=BinaryExpr { left: Column { name: "l_suppkey", index: 1 }, op: NotEq, right: Column { name: "l_suppkey", index: 0 } } - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(Column { name: "l_orderkey", index: 1 }, Column { name: "l_orderkey", index: 0 })], filter=BinaryExpr { left: Column { name: "l_suppkey", index: 1 }, op: NotEq, right: Column { name: "l_suppkey", index: 0 } } - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "l_orderkey", index: 1 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) + AggregateExec: mode=Partial, gby=[s_name@0 as s_name], aggr=[count(*)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0, projection=[s_name@0] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0 + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "l_orderkey", index: 1 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) Query Stage #10 (4 -> 4): ShuffleWriterExec(stage_id=10, output_partitioning=Hash([Column { name: "s_name", index: 0 }], 4)) - SortExec: fetch=100, expr=[numwait@1 DESC,s_name@0 ASC NULLS LAST] - ProjectionExec: expr=[s_name@0 as s_name, COUNT(UInt8(1))@1 as numwait] - AggregateExec: mode=FinalPartitioned, gby=[s_name@0 as s_name], aggr=[COUNT(UInt8(1))] + SortExec: TopK(fetch=100), expr=[numwait@1 DESC,s_name@0 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[s_name@0 as s_name, count(*)@1 as numwait] + AggregateExec: mode=FinalPartitioned, gby=[s_name@0 as s_name], aggr=[count(*)] CoalesceBatchesExec: target_batch_size=8192 ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "s_name", index: 0 }], 4)) Query Stage #11 (1 -> 1): GlobalLimitExec: skip=0, fetch=100 - SortPreservingMergeExec: [numwait@1 DESC,s_name@0 ASC NULLS LAST] + SortPreservingMergeExec: [numwait@1 DESC,s_name@0 ASC NULLS LAST], fetch=100 ShuffleReaderExec(stage_id=10, input_partitioning=Hash([Column { name: "s_name", index: 0 }], 4)) diff --git a/testdata/expected-plans/q22.txt b/testdata/expected-plans/q22.txt index cdc613d..b091c31 100644 --- a/testdata/expected-plans/q22.txt +++ b/testdata/expected-plans/q22.txt @@ -2,113 +2,98 @@ DataFusion Logical Plan ======================= Sort: custsale.cntrycode ASC NULLS LAST - Projection: custsale.cntrycode, COUNT(UInt8(1)) AS numcust, SUM(custsale.c_acctbal) AS totacctbal - Aggregate: groupBy=[[custsale.cntrycode]], aggr=[[COUNT(UInt8(1)), SUM(custsale.c_acctbal)]] + Projection: custsale.cntrycode, count(*) AS numcust, sum(custsale.c_acctbal) AS totacctbal + Aggregate: groupBy=[[custsale.cntrycode]], aggr=[[count(Int64(1)) AS count(*), sum(custsale.c_acctbal)]] SubqueryAlias: custsale Projection: substr(customer.c_phone, Int64(1), Int64(2)) AS cntrycode, customer.c_acctbal - Filter: CAST(customer.c_acctbal AS Decimal128(15, 6)) > __scalar_sq_1.__value - CrossJoin: - Projection: customer.c_phone, customer.c_acctbal - LeftAnti Join: customer.c_custkey = orders.o_custkey - Filter: substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("24"), Utf8("34"), Utf8("16"), Utf8("30"), Utf8("33"), Utf8("14"), Utf8("13")]) - TableScan: customer projection=[c_custkey, c_phone, c_acctbal], partial_filters=[substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("24"), Utf8("34"), Utf8("16"), Utf8("30"), Utf8("33"), Utf8("14"), Utf8("13")])] + Inner Join: Filter: CAST(customer.c_acctbal AS Decimal128(15, 6)) > __scalar_sq_2.avg(customer.c_acctbal) + Projection: customer.c_phone, customer.c_acctbal + LeftAnti Join: customer.c_custkey = __correlated_sq_1.o_custkey + Filter: substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("24"), Utf8("34"), Utf8("16"), Utf8("30"), Utf8("33"), Utf8("14"), Utf8("13")]) + TableScan: customer projection=[c_custkey, c_phone, c_acctbal], partial_filters=[substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("24"), Utf8("34"), Utf8("16"), Utf8("30"), Utf8("33"), Utf8("14"), Utf8("13")])] + SubqueryAlias: __correlated_sq_1 TableScan: orders projection=[o_custkey] - SubqueryAlias: __scalar_sq_1 - Projection: AVG(customer.c_acctbal) AS __value - Aggregate: groupBy=[[]], aggr=[[AVG(customer.c_acctbal)]] - Projection: customer.c_acctbal - Filter: customer.c_acctbal > Decimal128(Some(0),11,2) AND substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("24"), Utf8("34"), Utf8("16"), Utf8("30"), Utf8("33"), Utf8("14"), Utf8("13")]) - TableScan: customer projection=[c_phone, c_acctbal], partial_filters=[customer.c_acctbal > Decimal128(Some(0),11,2) AS customer.c_acctbal > Decimal128(Some(0),30,15), substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("24"), Utf8("34"), Utf8("16"), Utf8("30"), Utf8("33"), Utf8("14"), Utf8("13")]), customer.c_acctbal > Decimal128(Some(0),11,2)] + SubqueryAlias: __scalar_sq_2 + Aggregate: groupBy=[[]], aggr=[[avg(customer.c_acctbal)]] + Projection: customer.c_acctbal + Filter: customer.c_acctbal > Decimal128(Some(0),11,2) AND substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("24"), Utf8("34"), Utf8("16"), Utf8("30"), Utf8("33"), Utf8("14"), Utf8("13")]) + TableScan: customer projection=[c_phone, c_acctbal], partial_filters=[customer.c_acctbal > Decimal128(Some(0),11,2) AS customer.c_acctbal > Decimal128(Some(0),30,15), substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("24"), Utf8("34"), Utf8("16"), Utf8("30"), Utf8("33"), Utf8("14"), Utf8("13")]), customer.c_acctbal > Decimal128(Some(0),11,2)] DataFusion Physical Plan ======================== SortPreservingMergeExec: [cntrycode@0 ASC NULLS LAST] - SortExec: expr=[cntrycode@0 ASC NULLS LAST] - ProjectionExec: expr=[cntrycode@0 as cntrycode, COUNT(UInt8(1))@1 as numcust, SUM(custsale.c_acctbal)@2 as totacctbal] - AggregateExec: mode=FinalPartitioned, gby=[cntrycode@0 as cntrycode], aggr=[COUNT(UInt8(1)), SUM(custsale.c_acctbal)] + SortExec: expr=[cntrycode@0 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[cntrycode@0 as cntrycode, count(*)@1 as numcust, sum(custsale.c_acctbal)@2 as totacctbal] + AggregateExec: mode=FinalPartitioned, gby=[cntrycode@0 as cntrycode], aggr=[count(*), sum(custsale.c_acctbal)] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "cntrycode", index: 0 }], 4), input_partitions=1 - AggregateExec: mode=Partial, gby=[cntrycode@0 as cntrycode], aggr=[COUNT(UInt8(1)), SUM(custsale.c_acctbal)] - ProjectionExec: expr=[substr(c_phone@0, 1, 2) as cntrycode, c_acctbal@1 as c_acctbal] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: CAST(c_acctbal@1 AS Decimal128(15, 6)) > __value@2 - CrossJoinExec - CoalescePartitionsExec - ProjectionExec: expr=[c_phone@1 as c_phone, c_acctbal@2 as c_acctbal] + RepartitionExec: partitioning=Hash([cntrycode@0], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[cntrycode@0 as cntrycode], aggr=[count(*), sum(custsale.c_acctbal)] + ProjectionExec: expr=[substr(c_phone@1, 1, 2) as cntrycode, c_acctbal@2 as c_acctbal] + NestedLoopJoinExec: join_type=Inner, filter=CAST(c_acctbal@0 AS Decimal128(15, 6)) > avg(customer.c_acctbal)@1 + AggregateExec: mode=Final, gby=[], aggr=[avg(customer.c_acctbal)] + CoalescePartitionsExec + AggregateExec: mode=Partial, gby=[], aggr=[avg(customer.c_acctbal)] + ProjectionExec: expr=[c_acctbal@1 as c_acctbal] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(Column { name: "c_custkey", index: 0 }, Column { name: "o_custkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: Use substr(c_phone@1, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-3.parquet]]}, predicate=Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]), projection=[c_custkey, c_phone, c_acctbal] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "o_custkey", index: 0 }], 4), input_partitions=4 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-3.parquet]]}, projection=[o_custkey] - ProjectionExec: expr=[AVG(customer.c_acctbal)@0 as __value] - AggregateExec: mode=Final, gby=[], aggr=[AVG(customer.c_acctbal)] - CoalescePartitionsExec - AggregateExec: mode=Partial, gby=[], aggr=[AVG(customer.c_acctbal)] - ProjectionExec: expr=[c_acctbal@1 as c_acctbal] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: c_acctbal@1 > Some(0),11,2 AND Use substr(c_phone@0, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-3.parquet]]}, predicate=c_acctbal@5 > Some(0),11,2 AND Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) AND c_acctbal@5 > Some(0),11,2, pruning_predicate=c_acctbal_max@0 > Some(0),11,2 AND c_acctbal_max@0 > Some(0),11,2, projection=[c_phone, c_acctbal] + FilterExec: c_acctbal@1 > Some(0),11,2 AND Use substr(c_phone@0, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_phone, c_acctbal], predicate=c_acctbal@5 > Some(0),11,2 AND Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) AND c_acctbal@5 > Some(0),11,2, pruning_predicate=CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END AND CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END, required_guarantees=[] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(c_custkey@0, o_custkey@0)], projection=[c_phone@1, c_acctbal@2] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: Use substr(c_phone@1, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_phone, c_acctbal], predicate=Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_custkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_custkey] RaySQL Plan =========== -Query Stage #0 (4 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: Use substr(c_phone@1, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-3.parquet]]}, predicate=Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]), projection=[c_custkey, c_phone, c_acctbal] +Query Stage #0 (4 -> 1): +ShuffleWriterExec(stage_id=0, output_partitioning=UnknownPartitioning(4)) + AggregateExec: mode=Partial, gby=[], aggr=[avg(customer.c_acctbal)] + ProjectionExec: expr=[c_acctbal@1 as c_acctbal] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: c_acctbal@1 > Some(0),11,2 AND Use substr(c_phone@0, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_phone, c_acctbal], predicate=c_acctbal@5 > Some(0),11,2 AND Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) AND c_acctbal@5 > Some(0),11,2, pruning_predicate=CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END AND CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END, required_guarantees=[] Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "o_custkey", index: 0 }], 4)) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-3.parquet]]}, projection=[o_custkey] +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: Use substr(c_phone@1, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_phone, c_acctbal], predicate=Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([], 4)) - ProjectionExec: expr=[c_phone@1 as c_phone, c_acctbal@2 as c_acctbal] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(Column { name: "c_custkey", index: 0 }, Column { name: "o_custkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "o_custkey", index: 0 }], 4)) +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "o_custkey", index: 0 }], 4)) + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_custkey] -Query Stage #3 (4 -> 1): -ShuffleWriterExec(stage_id=3, output_partitioning=UnknownPartitioning(4)) - AggregateExec: mode=Partial, gby=[], aggr=[AVG(customer.c_acctbal)] - ProjectionExec: expr=[c_acctbal@1 as c_acctbal] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: c_acctbal@1 > Some(0),11,2 AND Use substr(c_phone@0, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-3.parquet]]}, predicate=c_acctbal@5 > Some(0),11,2 AND Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) AND c_acctbal@5 > Some(0),11,2, pruning_predicate=c_acctbal_max@0 > Some(0),11,2 AND c_acctbal_max@0 > Some(0),11,2, projection=[c_phone, c_acctbal] +Query Stage #3 (4 -> 4): +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "cntrycode", index: 0 }], 4)) + AggregateExec: mode=Partial, gby=[cntrycode@0 as cntrycode], aggr=[count(*), sum(custsale.c_acctbal)] + ProjectionExec: expr=[substr(c_phone@1, 1, 2) as cntrycode, c_acctbal@2 as c_acctbal] + NestedLoopJoinExec: join_type=Inner, filter=CAST(c_acctbal@0 AS Decimal128(15, 6)) > avg(customer.c_acctbal)@1 + AggregateExec: mode=Final, gby=[], aggr=[avg(customer.c_acctbal)] + CoalescePartitionsExec + ShuffleReaderExec(stage_id=0, input_partitioning=UnknownPartitioning(4)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(c_custkey@0, o_custkey@0)], projection=[c_phone@1, c_acctbal@2] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "o_custkey", index: 0 }], 4)) -Query Stage #4 (1 -> 4): +Query Stage #4 (4 -> 4): ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "cntrycode", index: 0 }], 4)) - AggregateExec: mode=Partial, gby=[cntrycode@0 as cntrycode], aggr=[COUNT(UInt8(1)), SUM(custsale.c_acctbal)] - ProjectionExec: expr=[substr(c_phone@0, 1, 2) as cntrycode, c_acctbal@1 as c_acctbal] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: CAST(c_acctbal@1 AS Decimal128(15, 6)) > __value@2 - CrossJoinExec - CoalescePartitionsExec - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([], 4)) - ProjectionExec: expr=[AVG(customer.c_acctbal)@0 as __value] - AggregateExec: mode=Final, gby=[], aggr=[AVG(customer.c_acctbal)] - CoalescePartitionsExec - ShuffleReaderExec(stage_id=3, input_partitioning=UnknownPartitioning(4)) - -Query Stage #5 (4 -> 4): -ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "cntrycode", index: 0 }], 4)) - SortExec: expr=[cntrycode@0 ASC NULLS LAST] - ProjectionExec: expr=[cntrycode@0 as cntrycode, COUNT(UInt8(1))@1 as numcust, SUM(custsale.c_acctbal)@2 as totacctbal] - AggregateExec: mode=FinalPartitioned, gby=[cntrycode@0 as cntrycode], aggr=[COUNT(UInt8(1)), SUM(custsale.c_acctbal)] + SortExec: expr=[cntrycode@0 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[cntrycode@0 as cntrycode, count(*)@1 as numcust, sum(custsale.c_acctbal)@2 as totacctbal] + AggregateExec: mode=FinalPartitioned, gby=[cntrycode@0 as cntrycode], aggr=[count(*), sum(custsale.c_acctbal)] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "cntrycode", index: 0 }], 4)) + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "cntrycode", index: 0 }], 4)) -Query Stage #6 (4 -> 1): +Query Stage #5 (4 -> 1): SortPreservingMergeExec: [cntrycode@0 ASC NULLS LAST] - ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "cntrycode", index: 0 }], 4)) + ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "cntrycode", index: 0 }], 4)) diff --git a/testdata/expected-plans/q3.txt b/testdata/expected-plans/q3.txt index e6f9749..b4cc717 100644 --- a/testdata/expected-plans/q3.txt +++ b/testdata/expected-plans/q3.txt @@ -3,8 +3,8 @@ DataFusion Logical Plan Limit: skip=0, fetch=10 Sort: revenue DESC NULLS FIRST, orders.o_orderdate ASC NULLS LAST, fetch=10 - Projection: lineitem.l_orderkey, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue, orders.o_orderdate, orders.o_shippriority - Aggregate: groupBy=[[lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority]], aggr=[[SUM(CAST(lineitem.l_extendedprice AS Decimal128(35, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(35, 4))) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] + Projection: lineitem.l_orderkey, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue, orders.o_orderdate, orders.o_shippriority + Aggregate: groupBy=[[lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority]], aggr=[[sum(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] Projection: orders.o_orderdate, orders.o_shippriority, lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount Inner Join: orders.o_orderkey = lineitem.l_orderkey Projection: orders.o_orderkey, orders.o_orderdate, orders.o_shippriority @@ -12,48 +12,46 @@ Limit: skip=0, fetch=10 Projection: customer.c_custkey Filter: customer.c_mktsegment = Utf8("BUILDING") TableScan: customer projection=[c_custkey, c_mktsegment], partial_filters=[customer.c_mktsegment = Utf8("BUILDING")] - Filter: orders.o_orderdate < Date32("9204") - TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], partial_filters=[orders.o_orderdate < Date32("9204")] + Filter: orders.o_orderdate < Date32("1995-03-15") + TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], partial_filters=[orders.o_orderdate < Date32("1995-03-15")] Projection: lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount - Filter: lineitem.l_shipdate > Date32("9204") - TableScan: lineitem projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], partial_filters=[lineitem.l_shipdate > Date32("9204")] + Filter: lineitem.l_shipdate > Date32("1995-03-15") + TableScan: lineitem projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], partial_filters=[lineitem.l_shipdate > Date32("1995-03-15")] DataFusion Physical Plan ======================== GlobalLimitExec: skip=0, fetch=10 - SortPreservingMergeExec: [revenue@1 DESC,o_orderdate@2 ASC NULLS LAST] - SortExec: fetch=10, expr=[revenue@1 DESC,o_orderdate@2 ASC NULLS LAST] - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@3 as revenue, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority] - AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + SortPreservingMergeExec: [revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], fetch=10 + SortExec: TopK(fetch=10), expr=[revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@3 as revenue, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority] + AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "l_orderkey", index: 0 }, Column { name: "o_orderdate", index: 1 }, Column { name: "o_shippriority", index: 2 }], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[l_orderkey@2 as l_orderkey, o_orderdate@0 as o_orderdate, o_shippriority@1 as o_shippriority], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - ProjectionExec: expr=[o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority, l_orderkey@3 as l_orderkey, l_extendedprice@4 as l_extendedprice, l_discount@5 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "o_orderkey", index: 0 }, Column { name: "l_orderkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4), input_partitions=4 - ProjectionExec: expr=[o_orderkey@1 as o_orderkey, o_orderdate@3 as o_orderdate, o_shippriority@4 as o_shippriority] + RepartitionExec: partitioning=Hash([l_orderkey@0, o_orderdate@1, o_shippriority@2], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[l_orderkey@2 as l_orderkey, o_orderdate@0 as o_orderdate, o_shippriority@1 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, o_shippriority@2, l_orderkey@3, l_extendedprice@4, l_discount@5] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[o_orderkey@1, o_orderdate@3, o_shippriority@4] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "c_custkey", index: 0 }, Column { name: "o_custkey", index: 1 })] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4), input_partitions=4 - ProjectionExec: expr=[c_custkey@0 as c_custkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: c_mktsegment@1 = BUILDING - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-3.parquet]]}, predicate=c_mktsegment@6 = BUILDING, pruning_predicate=c_mktsegment_min@0 <= BUILDING AND BUILDING <= c_mktsegment_max@1, projection=[c_custkey, c_mktsegment] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 < 9204 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-3.parquet]]}, predicate=o_orderdate@4 < 9204, pruning_predicate=o_orderdate_min@0 < 9204, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4), input_partitions=4 - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] + RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 + ProjectionExec: expr=[c_custkey@0 as c_custkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: c_mktsegment@1 = BUILDING + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_mktsegment], predicate=c_mktsegment@6 = BUILDING, pruning_predicate=CASE WHEN c_mktsegment_null_count@2 = c_mktsegment_row_count@3 THEN false ELSE c_mktsegment_min@0 <= BUILDING AND BUILDING <= c_mktsegment_max@1 END, required_guarantees=[c_mktsegment in (BUILDING)] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@3 > 9204 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, predicate=l_shipdate@10 > 9204, pruning_predicate=l_shipdate_max@0 > 9204, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate] + RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@2 < 1995-03-15 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], predicate=o_orderdate@4 < 1995-03-15, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@0 < 1995-03-15 END, required_guarantees=[] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@3 > 1995-03-15 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 > 1995-03-15, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 > 1995-03-15 END, required_guarantees=[] RaySQL Plan =========== @@ -63,52 +61,50 @@ ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "c_custke ProjectionExec: expr=[c_custkey@0 as c_custkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: c_mktsegment@1 = BUILDING - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-3.parquet]]}, predicate=c_mktsegment@6 = BUILDING, pruning_predicate=c_mktsegment_min@0 <= BUILDING AND BUILDING <= c_mktsegment_max@1, projection=[c_custkey, c_mktsegment] + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_mktsegment], predicate=c_mktsegment@6 = BUILDING, pruning_predicate=CASE WHEN c_mktsegment_null_count@2 = c_mktsegment_row_count@3 THEN false ELSE c_mktsegment_min@0 <= BUILDING AND BUILDING <= c_mktsegment_max@1 END, required_guarantees=[c_mktsegment in (BUILDING)] Query Stage #1 (4 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 < 9204 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-3.parquet]]}, predicate=o_orderdate@4 < 9204, pruning_predicate=o_orderdate_min@0 < 9204, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority] + FilterExec: o_orderdate@2 < 1995-03-15 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], predicate=o_orderdate@4 < 1995-03-15, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@0 < 1995-03-15 END, required_guarantees=[] Query Stage #2 (4 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - ProjectionExec: expr=[o_orderkey@1 as o_orderkey, o_orderdate@3 as o_orderdate, o_shippriority@4 as o_shippriority] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "c_custkey", index: 0 }, Column { name: "o_custkey", index: 1 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[o_orderkey@1, o_orderdate@3, o_shippriority@4] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) Query Stage #3 (4 -> 4): ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@3 > 9204 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, predicate=l_shipdate@10 > 9204, pruning_predicate=l_shipdate_max@0 > 9204, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate] + FilterExec: l_shipdate@3 > 1995-03-15 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 > 1995-03-15, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 > 1995-03-15 END, required_guarantees=[] Query Stage #4 (4 -> 4): ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }, Column { name: "o_orderdate", index: 1 }, Column { name: "o_shippriority", index: 2 }], 4)) - AggregateExec: mode=Partial, gby=[l_orderkey@2 as l_orderkey, o_orderdate@0 as o_orderdate, o_shippriority@1 as o_shippriority], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - ProjectionExec: expr=[o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority, l_orderkey@3 as l_orderkey, l_extendedprice@4 as l_extendedprice, l_discount@5 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "o_orderkey", index: 0 }, Column { name: "l_orderkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) + AggregateExec: mode=Partial, gby=[l_orderkey@2 as l_orderkey, o_orderdate@0 as o_orderdate, o_shippriority@1 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, o_shippriority@2, l_orderkey@3, l_extendedprice@4, l_discount@5] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) Query Stage #5 (4 -> 4): ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }, Column { name: "o_orderdate", index: 2 }, Column { name: "o_shippriority", index: 3 }], 4)) - SortExec: fetch=10, expr=[revenue@1 DESC,o_orderdate@2 ASC NULLS LAST] - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@3 as revenue, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority] - AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + SortExec: TopK(fetch=10), expr=[revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@3 as revenue, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority] + AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] CoalesceBatchesExec: target_batch_size=8192 ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }, Column { name: "o_orderdate", index: 1 }, Column { name: "o_shippriority", index: 2 }], 4)) Query Stage #6 (1 -> 1): GlobalLimitExec: skip=0, fetch=10 - SortPreservingMergeExec: [revenue@1 DESC,o_orderdate@2 ASC NULLS LAST] + SortPreservingMergeExec: [revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], fetch=10 ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }, Column { name: "o_orderdate", index: 2 }, Column { name: "o_shippriority", index: 3 }], 4)) diff --git a/testdata/expected-plans/q4.txt b/testdata/expected-plans/q4.txt index 17a19e1..c15d906 100644 --- a/testdata/expected-plans/q4.txt +++ b/testdata/expected-plans/q4.txt @@ -2,42 +2,42 @@ DataFusion Logical Plan ======================= Sort: orders.o_orderpriority ASC NULLS LAST - Projection: orders.o_orderpriority, COUNT(UInt8(1)) AS order_count - Aggregate: groupBy=[[orders.o_orderpriority]], aggr=[[COUNT(UInt8(1))]] + Projection: orders.o_orderpriority, count(*) AS order_count + Aggregate: groupBy=[[orders.o_orderpriority]], aggr=[[count(Int64(1)) AS count(*)]] Projection: orders.o_orderpriority - LeftSemi Join: orders.o_orderkey = lineitem.l_orderkey + LeftSemi Join: orders.o_orderkey = __correlated_sq_1.l_orderkey Projection: orders.o_orderkey, orders.o_orderpriority - Filter: orders.o_orderdate >= Date32("9221") AND orders.o_orderdate < Date32("9312") - TableScan: orders projection=[o_orderkey, o_orderdate, o_orderpriority], partial_filters=[orders.o_orderdate >= Date32("9221"), orders.o_orderdate < Date32("9312")] - Projection: lineitem.l_orderkey - Filter: lineitem.l_commitdate < lineitem.l_receiptdate - TableScan: lineitem projection=[l_orderkey, l_commitdate, l_receiptdate], partial_filters=[lineitem.l_commitdate < lineitem.l_receiptdate] + Filter: orders.o_orderdate >= Date32("1995-04-01") AND orders.o_orderdate < Date32("1995-07-01") + TableScan: orders projection=[o_orderkey, o_orderdate, o_orderpriority], partial_filters=[orders.o_orderdate >= Date32("1995-04-01"), orders.o_orderdate < Date32("1995-07-01")] + SubqueryAlias: __correlated_sq_1 + Projection: lineitem.l_orderkey + Filter: lineitem.l_receiptdate > lineitem.l_commitdate + TableScan: lineitem projection=[l_orderkey, l_commitdate, l_receiptdate], partial_filters=[lineitem.l_receiptdate > lineitem.l_commitdate] DataFusion Physical Plan ======================== SortPreservingMergeExec: [o_orderpriority@0 ASC NULLS LAST] - SortExec: expr=[o_orderpriority@0 ASC NULLS LAST] - ProjectionExec: expr=[o_orderpriority@0 as o_orderpriority, COUNT(UInt8(1))@1 as order_count] - AggregateExec: mode=FinalPartitioned, gby=[o_orderpriority@0 as o_orderpriority], aggr=[COUNT(UInt8(1))] + SortExec: expr=[o_orderpriority@0 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[o_orderpriority@0 as o_orderpriority, count(*)@1 as order_count] + AggregateExec: mode=FinalPartitioned, gby=[o_orderpriority@0 as o_orderpriority], aggr=[count(*)] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "o_orderpriority", index: 0 }], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[o_orderpriority@0 as o_orderpriority], aggr=[COUNT(UInt8(1))] - ProjectionExec: expr=[o_orderpriority@1 as o_orderpriority] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(Column { name: "o_orderkey", index: 0 }, Column { name: "l_orderkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4), input_partitions=4 - ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_orderpriority@2 as o_orderpriority] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@1 >= 9221 AND o_orderdate@1 < 9312 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-3.parquet]]}, predicate=o_orderdate@4 >= 9221 AND o_orderdate@4 < 9312, pruning_predicate=o_orderdate_max@0 >= 9221 AND o_orderdate_min@1 < 9312, projection=[o_orderkey, o_orderdate, o_orderpriority] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4), input_partitions=4 - ProjectionExec: expr=[l_orderkey@0 as l_orderkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_commitdate@1 < l_receiptdate@2 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, predicate=l_commitdate@11 < l_receiptdate@12, projection=[l_orderkey, l_commitdate, l_receiptdate] + RepartitionExec: partitioning=Hash([o_orderpriority@0], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[o_orderpriority@0 as o_orderpriority], aggr=[count(*)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderpriority@1] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 + ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_orderpriority@2 as o_orderpriority] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@1 >= 1995-04-01 AND o_orderdate@1 < 1995-07-01 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_orderdate, o_orderpriority], predicate=o_orderdate@4 >= 1995-04-01 AND o_orderdate@4 < 1995-07-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-04-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-07-01 END, required_guarantees=[] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 + ProjectionExec: expr=[l_orderkey@0 as l_orderkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_receiptdate@2 > l_commitdate@1 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 RaySQL Plan =========== @@ -46,32 +46,31 @@ Query Stage #0 (4 -> 4): ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_orderpriority@2 as o_orderpriority] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@1 >= 9221 AND o_orderdate@1 < 9312 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-3.parquet]]}, predicate=o_orderdate@4 >= 9221 AND o_orderdate@4 < 9312, pruning_predicate=o_orderdate_max@0 >= 9221 AND o_orderdate_min@1 < 9312, projection=[o_orderkey, o_orderdate, o_orderpriority] + FilterExec: o_orderdate@1 >= 1995-04-01 AND o_orderdate@1 < 1995-07-01 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_orderdate, o_orderpriority], predicate=o_orderdate@4 >= 1995-04-01 AND o_orderdate@4 < 1995-07-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-04-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-07-01 END, required_guarantees=[] Query Stage #1 (4 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) ProjectionExec: expr=[l_orderkey@0 as l_orderkey] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_commitdate@1 < l_receiptdate@2 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, predicate=l_commitdate@11 < l_receiptdate@12, projection=[l_orderkey, l_commitdate, l_receiptdate] + FilterExec: l_receiptdate@2 > l_commitdate@1 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 Query Stage #2 (4 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "o_orderpriority", index: 0 }], 4)) - AggregateExec: mode=Partial, gby=[o_orderpriority@0 as o_orderpriority], aggr=[COUNT(UInt8(1))] - ProjectionExec: expr=[o_orderpriority@1 as o_orderpriority] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(Column { name: "o_orderkey", index: 0 }, Column { name: "l_orderkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) + AggregateExec: mode=Partial, gby=[o_orderpriority@0 as o_orderpriority], aggr=[count(*)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderpriority@1] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) Query Stage #3 (4 -> 4): ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "o_orderpriority", index: 0 }], 4)) - SortExec: expr=[o_orderpriority@0 ASC NULLS LAST] - ProjectionExec: expr=[o_orderpriority@0 as o_orderpriority, COUNT(UInt8(1))@1 as order_count] - AggregateExec: mode=FinalPartitioned, gby=[o_orderpriority@0 as o_orderpriority], aggr=[COUNT(UInt8(1))] + SortExec: expr=[o_orderpriority@0 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[o_orderpriority@0 as o_orderpriority, count(*)@1 as order_count] + AggregateExec: mode=FinalPartitioned, gby=[o_orderpriority@0 as o_orderpriority], aggr=[count(*)] CoalesceBatchesExec: target_batch_size=8192 ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "o_orderpriority", index: 0 }], 4)) diff --git a/testdata/expected-plans/q5.txt b/testdata/expected-plans/q5.txt index ca7997e..bf23579 100644 --- a/testdata/expected-plans/q5.txt +++ b/testdata/expected-plans/q5.txt @@ -2,8 +2,8 @@ DataFusion Logical Plan ======================= Sort: revenue DESC NULLS FIRST - Projection: nation.n_name, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue - Aggregate: groupBy=[[nation.n_name]], aggr=[[SUM(CAST(lineitem.l_extendedprice AS Decimal128(35, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(35, 4))) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] + Projection: nation.n_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue + Aggregate: groupBy=[[nation.n_name]], aggr=[[sum(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] Projection: lineitem.l_extendedprice, lineitem.l_discount, nation.n_name Inner Join: nation.n_regionkey = region.r_regionkey Projection: lineitem.l_extendedprice, lineitem.l_discount, nation.n_name, nation.n_regionkey @@ -16,8 +16,8 @@ Sort: revenue DESC NULLS FIRST Inner Join: customer.c_custkey = orders.o_custkey TableScan: customer projection=[c_custkey, c_nationkey] Projection: orders.o_orderkey, orders.o_custkey - Filter: orders.o_orderdate >= Date32("8766") AND orders.o_orderdate < Date32("9131") - TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate], partial_filters=[orders.o_orderdate >= Date32("8766"), orders.o_orderdate < Date32("9131")] + Filter: orders.o_orderdate >= Date32("1994-01-01") AND orders.o_orderdate < Date32("1995-01-01") + TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate], partial_filters=[orders.o_orderdate >= Date32("1994-01-01"), orders.o_orderdate < Date32("1995-01-01")] TableScan: lineitem projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] TableScan: supplier projection=[s_suppkey, s_nationkey] TableScan: nation projection=[n_nationkey, n_name, n_regionkey] @@ -29,150 +29,145 @@ DataFusion Physical Plan ======================== SortPreservingMergeExec: [revenue@1 DESC] - SortExec: expr=[revenue@1 DESC] - ProjectionExec: expr=[n_name@0 as n_name, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as revenue] - AggregateExec: mode=FinalPartitioned, gby=[n_name@0 as n_name], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + SortExec: expr=[revenue@1 DESC], preserve_partitioning=[true] + ProjectionExec: expr=[n_name@0 as n_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as revenue] + AggregateExec: mode=FinalPartitioned, gby=[n_name@0 as n_name], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "n_name", index: 0 }], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[n_name@2 as n_name], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - ProjectionExec: expr=[l_extendedprice@0 as l_extendedprice, l_discount@1 as l_discount, n_name@2 as n_name] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "n_regionkey", index: 3 }, Column { name: "r_regionkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "n_regionkey", index: 3 }], 4), input_partitions=4 - ProjectionExec: expr=[l_extendedprice@0 as l_extendedprice, l_discount@1 as l_discount, n_name@4 as n_name, n_regionkey@5 as n_regionkey] + RepartitionExec: partitioning=Hash([n_name@0], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[n_name@2 as n_name], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, n_name@3] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 2 }, Column { name: "n_nationkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4), input_partitions=4 - ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@5 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_suppkey", index: 1 }, Column { name: "s_suppkey", index: 0 }), (Column { name: "c_nationkey", index: 0 }, Column { name: "s_nationkey", index: 1 })] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "l_suppkey", index: 1 }, Column { name: "c_nationkey", index: 0 }], 4), input_partitions=4 - ProjectionExec: expr=[c_nationkey@0 as c_nationkey, l_suppkey@3 as l_suppkey, l_extendedprice@4 as l_extendedprice, l_discount@5 as l_discount] + FilterExec: r_name@1 = AFRICA + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = AFRICA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= AFRICA AND AFRICA <= r_name_max@1 END, required_guarantees=[r_name in (AFRICA)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([n_regionkey@3], 4), input_partitions=4 + ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, n_name@0 as n_name, n_regionkey@1 as n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, n_regionkey@2, l_extendedprice@3, l_discount@4] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1 + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4 + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1), (s_nationkey@1, c_nationkey@0)], projection=[s_nationkey@1, l_extendedprice@4, l_discount@5] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_suppkey@0, s_nationkey@1], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_suppkey@1, c_nationkey@0], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@1, l_orderkey@0)], projection=[c_nationkey@0, l_suppkey@3, l_extendedprice@4, l_discount@5] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "o_orderkey", index: 1 }, Column { name: "l_orderkey", index: 0 })] + RepartitionExec: partitioning=Hash([o_orderkey@1], 4), input_partitions=4 CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "o_orderkey", index: 1 }], 4), input_partitions=4 - ProjectionExec: expr=[c_nationkey@1 as c_nationkey, o_orderkey@2 as o_orderkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "c_custkey", index: 0 }, Column { name: "o_custkey", index: 1 })] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_nationkey@1, o_orderkey@2] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4 + ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4), input_partitions=4 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-3.parquet]]}, projection=[c_custkey, c_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4), input_partitions=4 - ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 >= 8766 AND o_orderdate@2 < 9131 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-3.parquet]]}, predicate=o_orderdate@4 >= 8766 AND o_orderdate@4 < 9131, pruning_predicate=o_orderdate_max@0 >= 8766 AND o_orderdate_min@1 < 9131, projection=[o_orderkey, o_custkey, o_orderdate] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4), input_partitions=4 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }, Column { name: "s_nationkey", index: 1 }], 4), input_partitions=4 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-3.parquet]]}, projection=[s_suppkey, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4), input_partitions=1 - ParquetExec: limit=None, partitions={1 group: [[mnt/bigdata/tpch/sf10-parquet/nation.parquet/part-0.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = AFRICA - ParquetExec: limit=None, partitions={1 group: [[mnt/bigdata/tpch/sf10-parquet/region.parquet/part-0.parquet]]}, predicate=r_name@1 = AFRICA, pruning_predicate=r_name_min@0 <= AFRICA AND AFRICA <= r_name_max@1, projection=[r_regionkey, r_name] + FilterExec: o_orderdate@2 >= 1994-01-01 AND o_orderdate@2 < 1995-01-01 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1994-01-01 AND o_orderdate@4 < 1995-01-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1994-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-01-01 END, required_guarantees=[] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] RaySQL Plan =========== -Query Stage #0 (4 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-3.parquet]]}, projection=[c_custkey, c_nationkey] - -Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) - ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] +Query Stage #0 (1 -> 4): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 >= 8766 AND o_orderdate@2 < 9131 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-3.parquet]]}, predicate=o_orderdate@4 >= 8766 AND o_orderdate@4 < 9131, pruning_predicate=o_orderdate_max@0 >= 8766 AND o_orderdate_min@1 < 9131, projection=[o_orderkey, o_custkey, o_orderdate] + FilterExec: r_name@1 = AFRICA + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = AFRICA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= AFRICA AND AFRICA <= r_name_max@1 END, required_guarantees=[r_name in (AFRICA)] -Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "o_orderkey", index: 1 }], 4)) - ProjectionExec: expr=[c_nationkey@1 as c_nationkey, o_orderkey@2 as o_orderkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "c_custkey", index: 0 }, Column { name: "o_custkey", index: 1 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) +Query Stage #1 (1 -> 4): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] + +Query Stage #2 (2 -> 4): +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }, Column { name: "s_nationkey", index: 1 }], 4)) + ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] Query Stage #3 (4 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_nationkey] Query Stage #4 (4 -> 4): -ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "l_suppkey", index: 1 }, Column { name: "c_nationkey", index: 0 }], 4)) - ProjectionExec: expr=[c_nationkey@0 as c_nationkey, l_suppkey@3 as l_suppkey, l_extendedprice@4 as l_extendedprice, l_discount@5 as l_discount] +ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) + ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "o_orderkey", index: 1 }, Column { name: "l_orderkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "o_orderkey", index: 1 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) + FilterExec: o_orderdate@2 >= 1994-01-01 AND o_orderdate@2 < 1995-01-01 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1994-01-01 AND o_orderdate@4 < 1995-01-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1994-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-01-01 END, required_guarantees=[] Query Stage #5 (4 -> 4): -ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }, Column { name: "s_nationkey", index: 1 }], 4)) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-3.parquet]]}, projection=[s_suppkey, s_nationkey] +ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "o_orderkey", index: 1 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_nationkey@1, o_orderkey@2] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) Query Stage #6 (4 -> 4): -ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) - ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@5 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_suppkey", index: 1 }, Column { name: "s_suppkey", index: 0 }), (Column { name: "c_nationkey", index: 0 }, Column { name: "s_nationkey", index: 1 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "l_suppkey", index: 1 }, Column { name: "c_nationkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }, Column { name: "s_nationkey", index: 1 }], 4)) +ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] -Query Stage #7 (1 -> 4): -ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ParquetExec: limit=None, partitions={1 group: [[mnt/bigdata/tpch/sf10-parquet/nation.parquet/part-0.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] +Query Stage #7 (4 -> 4): +ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "l_suppkey", index: 1 }, Column { name: "c_nationkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@1, l_orderkey@0)], projection=[c_nationkey@0, l_suppkey@3, l_extendedprice@4, l_discount@5] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "o_orderkey", index: 1 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) Query Stage #8 (4 -> 4): -ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "n_regionkey", index: 3 }], 4)) - ProjectionExec: expr=[l_extendedprice@0 as l_extendedprice, l_discount@1 as l_discount, n_name@4 as n_name, n_regionkey@5 as n_regionkey] +ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@0 as s_nationkey] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 2 }, Column { name: "n_nationkey", index: 0 })] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1), (s_nationkey@1, c_nationkey@0)], projection=[s_nationkey@1, l_extendedprice@4, l_discount@5] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }, Column { name: "s_nationkey", index: 1 }], 4)) CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "l_suppkey", index: 1 }, Column { name: "c_nationkey", index: 0 }], 4)) -Query Stage #9 (1 -> 4): -ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] +Query Stage #9 (4 -> 4): +ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "n_regionkey", index: 3 }], 4)) + ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, n_name@0 as n_name, n_regionkey@1 as n_regionkey] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = AFRICA - ParquetExec: limit=None, partitions={1 group: [[mnt/bigdata/tpch/sf10-parquet/region.parquet/part-0.parquet]]}, predicate=r_name@1 = AFRICA, pruning_predicate=r_name_min@0 <= AFRICA AND AFRICA <= r_name_max@1, projection=[r_regionkey, r_name] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, n_regionkey@2, l_extendedprice@3, l_discount@4] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) Query Stage #10 (4 -> 4): ShuffleWriterExec(stage_id=10, output_partitioning=Hash([Column { name: "n_name", index: 0 }], 4)) - AggregateExec: mode=Partial, gby=[n_name@2 as n_name], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - ProjectionExec: expr=[l_extendedprice@0 as l_extendedprice, l_discount@1 as l_discount, n_name@2 as n_name] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "n_regionkey", index: 3 }, Column { name: "r_regionkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "n_regionkey", index: 3 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) + AggregateExec: mode=Partial, gby=[n_name@2 as n_name], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, n_name@3] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "n_regionkey", index: 3 }], 4)) Query Stage #11 (4 -> 4): ShuffleWriterExec(stage_id=11, output_partitioning=Hash([Column { name: "n_name", index: 0 }], 4)) - SortExec: expr=[revenue@1 DESC] - ProjectionExec: expr=[n_name@0 as n_name, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as revenue] - AggregateExec: mode=FinalPartitioned, gby=[n_name@0 as n_name], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + SortExec: expr=[revenue@1 DESC], preserve_partitioning=[true] + ProjectionExec: expr=[n_name@0 as n_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as revenue] + AggregateExec: mode=FinalPartitioned, gby=[n_name@0 as n_name], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] CoalesceBatchesExec: target_batch_size=8192 ShuffleReaderExec(stage_id=10, input_partitioning=Hash([Column { name: "n_name", index: 0 }], 4)) diff --git a/testdata/expected-plans/q6.txt b/testdata/expected-plans/q6.txt index 0f41810..0cd41ee 100644 --- a/testdata/expected-plans/q6.txt +++ b/testdata/expected-plans/q6.txt @@ -1,38 +1,38 @@ DataFusion Logical Plan ======================= -Projection: SUM(lineitem.l_extendedprice * lineitem.l_discount) AS revenue - Aggregate: groupBy=[[]], aggr=[[SUM(lineitem.l_extendedprice * lineitem.l_discount)]] +Projection: sum(lineitem.l_extendedprice * lineitem.l_discount) AS revenue + Aggregate: groupBy=[[]], aggr=[[sum(lineitem.l_extendedprice * lineitem.l_discount)]] Projection: lineitem.l_extendedprice, lineitem.l_discount - Filter: lineitem.l_shipdate >= Date32("8766") AND lineitem.l_shipdate < Date32("9131") AND lineitem.l_discount >= Decimal128(Some(3),11,2) AND lineitem.l_discount <= Decimal128(Some(5),11,2) AND lineitem.l_quantity < Decimal128(Some(2400),11,2) - TableScan: lineitem projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], partial_filters=[lineitem.l_shipdate >= Date32("8766"), lineitem.l_shipdate < Date32("9131"), lineitem.l_discount >= Decimal128(Some(3),11,2), lineitem.l_discount <= Decimal128(Some(5),11,2), lineitem.l_quantity < Decimal128(Some(2400),11,2)] + Filter: lineitem.l_shipdate >= Date32("1994-01-01") AND lineitem.l_shipdate < Date32("1995-01-01") AND lineitem.l_discount >= Decimal128(Some(3),11,2) AND lineitem.l_discount <= Decimal128(Some(5),11,2) AND lineitem.l_quantity < Decimal128(Some(2400),11,2) + TableScan: lineitem projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], partial_filters=[lineitem.l_shipdate >= Date32("1994-01-01"), lineitem.l_shipdate < Date32("1995-01-01"), lineitem.l_discount >= Decimal128(Some(3),11,2), lineitem.l_discount <= Decimal128(Some(5),11,2), lineitem.l_quantity < Decimal128(Some(2400),11,2)] DataFusion Physical Plan ======================== -ProjectionExec: expr=[SUM(lineitem.l_extendedprice * lineitem.l_discount)@0 as revenue] - AggregateExec: mode=Final, gby=[], aggr=[SUM(lineitem.l_extendedprice * lineitem.l_discount)] +ProjectionExec: expr=[sum(lineitem.l_extendedprice * lineitem.l_discount)@0 as revenue] + AggregateExec: mode=Final, gby=[], aggr=[sum(lineitem.l_extendedprice * lineitem.l_discount)] CoalescePartitionsExec - AggregateExec: mode=Partial, gby=[], aggr=[SUM(lineitem.l_extendedprice * lineitem.l_discount)] + AggregateExec: mode=Partial, gby=[], aggr=[sum(lineitem.l_extendedprice * lineitem.l_discount)] ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@3 >= 8766 AND l_shipdate@3 < 9131 AND l_discount@2 >= Some(3),11,2 AND l_discount@2 <= Some(5),11,2 AND l_quantity@0 < Some(2400),11,2 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, predicate=l_shipdate@10 >= 8766 AND l_shipdate@10 < 9131 AND l_discount@6 >= Some(3),11,2 AND l_discount@6 <= Some(5),11,2 AND l_quantity@4 < Some(2400),11,2, pruning_predicate=l_shipdate_max@0 >= 8766 AND l_shipdate_min@1 < 9131 AND l_discount_max@2 >= Some(3),11,2 AND l_discount_min@3 <= Some(5),11,2 AND l_quantity_min@4 < Some(2400),11,2, projection=[l_quantity, l_extendedprice, l_discount, l_shipdate] + FilterExec: l_shipdate@3 >= 1994-01-01 AND l_shipdate@3 < 1995-01-01 AND l_discount@2 >= Some(3),11,2 AND l_discount@2 <= Some(5),11,2 AND l_quantity@0 < Some(2400),11,2 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1994-01-01 AND l_shipdate@10 < 1995-01-01 AND l_discount@6 >= Some(3),11,2 AND l_discount@6 <= Some(5),11,2 AND l_quantity@4 < Some(2400),11,2, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1994-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-01-01 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_max@4 >= Some(3),11,2 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_min@7 <= Some(5),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@8 < Some(2400),11,2 END, required_guarantees=[] RaySQL Plan =========== Query Stage #0 (4 -> 1): ShuffleWriterExec(stage_id=0, output_partitioning=UnknownPartitioning(4)) - AggregateExec: mode=Partial, gby=[], aggr=[SUM(lineitem.l_extendedprice * lineitem.l_discount)] + AggregateExec: mode=Partial, gby=[], aggr=[sum(lineitem.l_extendedprice * lineitem.l_discount)] ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@3 >= 8766 AND l_shipdate@3 < 9131 AND l_discount@2 >= Some(3),11,2 AND l_discount@2 <= Some(5),11,2 AND l_quantity@0 < Some(2400),11,2 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, predicate=l_shipdate@10 >= 8766 AND l_shipdate@10 < 9131 AND l_discount@6 >= Some(3),11,2 AND l_discount@6 <= Some(5),11,2 AND l_quantity@4 < Some(2400),11,2, pruning_predicate=l_shipdate_max@0 >= 8766 AND l_shipdate_min@1 < 9131 AND l_discount_max@2 >= Some(3),11,2 AND l_discount_min@3 <= Some(5),11,2 AND l_quantity_min@4 < Some(2400),11,2, projection=[l_quantity, l_extendedprice, l_discount, l_shipdate] + FilterExec: l_shipdate@3 >= 1994-01-01 AND l_shipdate@3 < 1995-01-01 AND l_discount@2 >= Some(3),11,2 AND l_discount@2 <= Some(5),11,2 AND l_quantity@0 < Some(2400),11,2 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1994-01-01 AND l_shipdate@10 < 1995-01-01 AND l_discount@6 >= Some(3),11,2 AND l_discount@6 <= Some(5),11,2 AND l_quantity@4 < Some(2400),11,2, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1994-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-01-01 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_max@4 >= Some(3),11,2 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_min@7 <= Some(5),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@8 < Some(2400),11,2 END, required_guarantees=[] Query Stage #1 (1 -> 1): -ProjectionExec: expr=[SUM(lineitem.l_extendedprice * lineitem.l_discount)@0 as revenue] - AggregateExec: mode=Final, gby=[], aggr=[SUM(lineitem.l_extendedprice * lineitem.l_discount)] +ProjectionExec: expr=[sum(lineitem.l_extendedprice * lineitem.l_discount)@0 as revenue] + AggregateExec: mode=Final, gby=[], aggr=[sum(lineitem.l_extendedprice * lineitem.l_discount)] CoalescePartitionsExec ShuffleReaderExec(stage_id=0, input_partitioning=UnknownPartitioning(4)) diff --git a/testdata/expected-plans/q7.txt b/testdata/expected-plans/q7.txt index 5b778e4..68c77e4 100644 --- a/testdata/expected-plans/q7.txt +++ b/testdata/expected-plans/q7.txt @@ -2,188 +2,177 @@ DataFusion Logical Plan ======================= Sort: shipping.supp_nation ASC NULLS LAST, shipping.cust_nation ASC NULLS LAST, shipping.l_year ASC NULLS LAST - Projection: shipping.supp_nation, shipping.cust_nation, shipping.l_year, SUM(shipping.volume) AS revenue - Aggregate: groupBy=[[shipping.supp_nation, shipping.cust_nation, shipping.l_year]], aggr=[[SUM(shipping.volume)]] + Projection: shipping.supp_nation, shipping.cust_nation, shipping.l_year, sum(shipping.volume) AS revenue + Aggregate: groupBy=[[shipping.supp_nation, shipping.cust_nation, shipping.l_year]], aggr=[[sum(shipping.volume)]] SubqueryAlias: shipping - Projection: n1.n_name AS supp_nation, n2.n_name AS cust_nation, datepart(Utf8("YEAR"), lineitem.l_shipdate) AS l_year, CAST(lineitem.l_extendedprice AS Decimal128(35, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(35, 4)) AS volume - Filter: n1.n_name = Utf8("GERMANY") AND n2.n_name = Utf8("IRAQ") OR n1.n_name = Utf8("IRAQ") AND n2.n_name = Utf8("GERMANY") - Projection: lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate, n1.n_name, n2.n_name - Inner Join: customer.c_nationkey = n2.n_nationkey - Projection: lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate, customer.c_nationkey, n1.n_name - Inner Join: supplier.s_nationkey = n1.n_nationkey - Projection: supplier.s_nationkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate, customer.c_nationkey - Inner Join: orders.o_custkey = customer.c_custkey - Projection: supplier.s_nationkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate, orders.o_custkey - Inner Join: lineitem.l_orderkey = orders.o_orderkey - Projection: supplier.s_nationkey, lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate - Inner Join: supplier.s_suppkey = lineitem.l_suppkey - TableScan: supplier projection=[s_suppkey, s_nationkey] - Filter: lineitem.l_shipdate >= Date32("9131") AND lineitem.l_shipdate <= Date32("9861") - TableScan: lineitem projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], partial_filters=[lineitem.l_shipdate >= Date32("9131"), lineitem.l_shipdate <= Date32("9861")] - TableScan: orders projection=[o_orderkey, o_custkey] - TableScan: customer projection=[c_custkey, c_nationkey] - SubqueryAlias: n1 - Filter: nation.n_name = Utf8("GERMANY") OR nation.n_name = Utf8("IRAQ") - TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("GERMANY") OR nation.n_name = Utf8("IRAQ")] - SubqueryAlias: n2 - Filter: nation.n_name = Utf8("IRAQ") OR nation.n_name = Utf8("GERMANY") - TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("IRAQ") OR nation.n_name = Utf8("GERMANY")] + Projection: n1.n_name AS supp_nation, n2.n_name AS cust_nation, date_part(Utf8("YEAR"), lineitem.l_shipdate) AS l_year, lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount) AS volume + Inner Join: customer.c_nationkey = n2.n_nationkey Filter: n1.n_name = Utf8("GERMANY") AND n2.n_name = Utf8("IRAQ") OR n1.n_name = Utf8("IRAQ") AND n2.n_name = Utf8("GERMANY") + Projection: lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate, customer.c_nationkey, n1.n_name + Inner Join: supplier.s_nationkey = n1.n_nationkey + Projection: supplier.s_nationkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate, customer.c_nationkey + Inner Join: orders.o_custkey = customer.c_custkey + Projection: supplier.s_nationkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate, orders.o_custkey + Inner Join: lineitem.l_orderkey = orders.o_orderkey + Projection: supplier.s_nationkey, lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate + Inner Join: supplier.s_suppkey = lineitem.l_suppkey + TableScan: supplier projection=[s_suppkey, s_nationkey] + Filter: lineitem.l_shipdate >= Date32("1995-01-01") AND lineitem.l_shipdate <= Date32("1996-12-31") + TableScan: lineitem projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], partial_filters=[lineitem.l_shipdate >= Date32("1995-01-01"), lineitem.l_shipdate <= Date32("1996-12-31")] + TableScan: orders projection=[o_orderkey, o_custkey] + TableScan: customer projection=[c_custkey, c_nationkey] + SubqueryAlias: n1 + Filter: nation.n_name = Utf8("GERMANY") OR nation.n_name = Utf8("IRAQ") + TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("GERMANY") OR nation.n_name = Utf8("IRAQ")] + SubqueryAlias: n2 + Filter: nation.n_name = Utf8("IRAQ") OR nation.n_name = Utf8("GERMANY") + TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("IRAQ") OR nation.n_name = Utf8("GERMANY")] DataFusion Physical Plan ======================== SortPreservingMergeExec: [supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS LAST,l_year@2 ASC NULLS LAST] - SortExec: expr=[supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS LAST,l_year@2 ASC NULLS LAST] - ProjectionExec: expr=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year, SUM(shipping.volume)@3 as revenue] - AggregateExec: mode=FinalPartitioned, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[SUM(shipping.volume)] + SortExec: expr=[supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS LAST,l_year@2 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year, sum(shipping.volume)@3 as revenue] + AggregateExec: mode=FinalPartitioned, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[sum(shipping.volume)] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "supp_nation", index: 0 }, Column { name: "cust_nation", index: 1 }, Column { name: "l_year", index: 2 }], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[SUM(shipping.volume)] - ProjectionExec: expr=[n_name@3 as supp_nation, n_name@4 as cust_nation, datepart(YEAR, l_shipdate@2) as l_year, CAST(l_extendedprice@0 AS Decimal128(35, 4)) * CAST(Some(100),23,2 - CAST(l_discount@1 AS Decimal128(23, 2)) AS Decimal128(35, 4)) as volume] + RepartitionExec: partitioning=Hash([supp_nation@0, cust_nation@1, l_year@2], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[sum(shipping.volume)] + ProjectionExec: expr=[n_name@4 as supp_nation, n_name@0 as cust_nation, date_part(YEAR, l_shipdate@3) as l_year, l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as volume] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@3 = GERMANY AND n_name@4 = IRAQ OR n_name@3 = IRAQ AND n_name@4 = GERMANY - ProjectionExec: expr=[l_extendedprice@0 as l_extendedprice, l_discount@1 as l_discount, l_shipdate@2 as l_shipdate, n_name@4 as n_name, n_name@6 as n_name] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "c_nationkey", index: 3 }, Column { name: "n_nationkey", index: 0 })] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], filter=n_name@0 = GERMANY AND n_name@1 = IRAQ OR n_name@0 = IRAQ AND n_name@1 = GERMANY, projection=[n_name@1, l_extendedprice@2, l_discount@3, l_shipdate@4, n_name@6] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "c_nationkey", index: 3 }], 4), input_partitions=4 - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_shipdate@3 as l_shipdate, c_nationkey@4 as c_nationkey, n_name@6 as n_name] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 0 }, Column { name: "n_nationkey", index: 0 })] + FilterExec: n_name@1 = IRAQ OR n_name@1 = GERMANY + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = IRAQ OR n_name@1 = GERMANY, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([c_nationkey@3], 4), input_partitions=4 + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_shipdate@3 as l_shipdate, c_nationkey@4 as c_nationkey, n_name@0 as n_name] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@0)], projection=[n_name@1, l_extendedprice@3, l_discount@4, l_shipdate@5, c_nationkey@6] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = GERMANY OR n_name@1 = IRAQ + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = GERMANY OR n_name@1 = IRAQ, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END, required_guarantees=[n_name in (IRAQ, GERMANY)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_nationkey@0], 4), input_partitions=4 + ProjectionExec: expr=[s_nationkey@1 as s_nationkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, l_shipdate@4 as l_shipdate, c_nationkey@0 as c_nationkey] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "s_nationkey", index: 0 }], 4), input_partitions=4 - ProjectionExec: expr=[s_nationkey@0 as s_nationkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_shipdate@3 as l_shipdate, c_nationkey@6 as c_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "o_custkey", index: 4 }, Column { name: "c_custkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "o_custkey", index: 4 }], 4), input_partitions=4 - ProjectionExec: expr=[s_nationkey@0 as s_nationkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, l_shipdate@4 as l_shipdate, o_custkey@6 as o_custkey] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@4)], projection=[c_nationkey@1, s_nationkey@2, l_extendedprice@3, l_discount@4, l_shipdate@5] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_custkey@4], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_orderkey@1, o_orderkey@0)], projection=[s_nationkey@0, l_extendedprice@2, l_discount@3, l_shipdate@4, o_custkey@6] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_orderkey@1], 4), input_partitions=4 CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_orderkey", index: 1 }, Column { name: "o_orderkey", index: 0 })] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5, l_shipdate@6] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "l_orderkey", index: 1 }], 4), input_partitions=4 - ProjectionExec: expr=[s_nationkey@1 as s_nationkey, l_orderkey@2 as l_orderkey, l_extendedprice@4 as l_extendedprice, l_discount@5 as l_discount, l_shipdate@6 as l_shipdate] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_suppkey", index: 0 }, Column { name: "l_suppkey", index: 1 })] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4), input_partitions=4 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-3.parquet]]}, projection=[s_suppkey, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@4 >= 9131 AND l_shipdate@4 <= 9861 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, predicate=l_shipdate@10 >= 9131 AND l_shipdate@10 <= 9861, pruning_predicate=l_shipdate_max@0 >= 9131 AND l_shipdate_min@1 <= 9861, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate] + RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4), input_partitions=4 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-3.parquet]]}, projection=[o_orderkey, o_custkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4), input_partitions=4 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-3.parquet]]}, projection=[c_custkey, c_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = GERMANY OR n_name@1 = IRAQ - ParquetExec: limit=None, partitions={1 group: [[mnt/bigdata/tpch/sf10-parquet/nation.parquet/part-0.parquet]]}, predicate=n_name@1 = GERMANY OR n_name@1 = IRAQ, pruning_predicate=n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 OR n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1, projection=[n_nationkey, n_name] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = IRAQ OR n_name@1 = GERMANY - ParquetExec: limit=None, partitions={1 group: [[mnt/bigdata/tpch/sf10-parquet/nation.parquet/part-0.parquet]]}, predicate=n_name@1 = IRAQ OR n_name@1 = GERMANY, pruning_predicate=n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 OR n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1, projection=[n_nationkey, n_name] + RepartitionExec: partitioning=Hash([l_suppkey@1], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@4 >= 1995-01-01 AND l_shipdate@4 <= 1996-12-31 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-01-01 AND l_shipdate@10 <= 1996-12-31, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 <= 1996-12-31 END, required_guarantees=[] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey] RaySQL Plan =========== -Query Stage #0 (4 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-3.parquet]]}, projection=[s_suppkey, s_nationkey] +Query Stage #0 (1 -> 4): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = IRAQ OR n_name@1 = GERMANY + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = IRAQ OR n_name@1 = GERMANY, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] -Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4)) +Query Stage #1 (1 -> 4): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@4 >= 9131 AND l_shipdate@4 <= 9861 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, predicate=l_shipdate@10 >= 9131 AND l_shipdate@10 <= 9861, pruning_predicate=l_shipdate_max@0 >= 9131 AND l_shipdate_min@1 <= 9861, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate] + FilterExec: n_name@1 = GERMANY OR n_name@1 = IRAQ + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = GERMANY OR n_name@1 = IRAQ, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END, required_guarantees=[n_name in (IRAQ, GERMANY)] Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "l_orderkey", index: 1 }], 4)) - ProjectionExec: expr=[s_nationkey@1 as s_nationkey, l_orderkey@2 as l_orderkey, l_extendedprice@4 as l_extendedprice, l_discount@5 as l_discount, l_shipdate@6 as l_shipdate] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_suppkey", index: 0 }, Column { name: "l_suppkey", index: 1 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4)) +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_nationkey] -Query Stage #3 (4 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-3.parquet]]}, projection=[o_orderkey, o_custkey] +Query Stage #3 (2 -> 4): +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) + ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] Query Stage #4 (4 -> 4): -ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "o_custkey", index: 4 }], 4)) - ProjectionExec: expr=[s_nationkey@0 as s_nationkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, l_shipdate@4 as l_shipdate, o_custkey@6 as o_custkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_orderkey", index: 1 }, Column { name: "o_orderkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "l_orderkey", index: 1 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) +ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@4 >= 1995-01-01 AND l_shipdate@4 <= 1996-12-31 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-01-01 AND l_shipdate@10 <= 1996-12-31, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 <= 1996-12-31 END, required_guarantees=[] Query Stage #5 (4 -> 4): -ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-3.parquet]]}, projection=[c_custkey, c_nationkey] +ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "l_orderkey", index: 1 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5, l_shipdate@6] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4)) Query Stage #6 (4 -> 4): -ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "s_nationkey", index: 0 }], 4)) - ProjectionExec: expr=[s_nationkey@0 as s_nationkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_shipdate@3 as l_shipdate, c_nationkey@6 as c_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "o_custkey", index: 4 }, Column { name: "c_custkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "o_custkey", index: 4 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) +ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey] -Query Stage #7 (1 -> 4): -ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) +Query Stage #7 (4 -> 4): +ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "o_custkey", index: 4 }], 4)) CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = GERMANY OR n_name@1 = IRAQ - ParquetExec: limit=None, partitions={1 group: [[mnt/bigdata/tpch/sf10-parquet/nation.parquet/part-0.parquet]]}, predicate=n_name@1 = GERMANY OR n_name@1 = IRAQ, pruning_predicate=n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 OR n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1, projection=[n_nationkey, n_name] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_orderkey@1, o_orderkey@0)], projection=[s_nationkey@0, l_extendedprice@2, l_discount@3, l_shipdate@4, o_custkey@6] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "l_orderkey", index: 1 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) Query Stage #8 (4 -> 4): -ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "c_nationkey", index: 3 }], 4)) - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_shipdate@3 as l_shipdate, c_nationkey@4 as c_nationkey, n_name@6 as n_name] +ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "s_nationkey", index: 0 }], 4)) + ProjectionExec: expr=[s_nationkey@1 as s_nationkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, l_shipdate@4 as l_shipdate, c_nationkey@0 as c_nationkey] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 0 }, Column { name: "n_nationkey", index: 0 })] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@4)], projection=[c_nationkey@1, s_nationkey@2, l_extendedprice@3, l_discount@4, l_shipdate@5] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "s_nationkey", index: 0 }], 4)) + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "o_custkey", index: 4 }], 4)) -Query Stage #9 (1 -> 4): -ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = IRAQ OR n_name@1 = GERMANY - ParquetExec: limit=None, partitions={1 group: [[mnt/bigdata/tpch/sf10-parquet/nation.parquet/part-0.parquet]]}, predicate=n_name@1 = IRAQ OR n_name@1 = GERMANY, pruning_predicate=n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 OR n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1, projection=[n_nationkey, n_name] +Query Stage #9 (4 -> 4): +ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "c_nationkey", index: 3 }], 4)) + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_shipdate@3 as l_shipdate, c_nationkey@4 as c_nationkey, n_name@0 as n_name] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@0)], projection=[n_name@1, l_extendedprice@3, l_discount@4, l_shipdate@5, c_nationkey@6] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "s_nationkey", index: 0 }], 4)) Query Stage #10 (4 -> 4): ShuffleWriterExec(stage_id=10, output_partitioning=Hash([Column { name: "supp_nation", index: 0 }, Column { name: "cust_nation", index: 1 }, Column { name: "l_year", index: 2 }], 4)) - AggregateExec: mode=Partial, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[SUM(shipping.volume)] - ProjectionExec: expr=[n_name@3 as supp_nation, n_name@4 as cust_nation, datepart(YEAR, l_shipdate@2) as l_year, CAST(l_extendedprice@0 AS Decimal128(35, 4)) * CAST(Some(100),23,2 - CAST(l_discount@1 AS Decimal128(23, 2)) AS Decimal128(35, 4)) as volume] + AggregateExec: mode=Partial, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[sum(shipping.volume)] + ProjectionExec: expr=[n_name@4 as supp_nation, n_name@0 as cust_nation, date_part(YEAR, l_shipdate@3) as l_year, l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as volume] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@3 = GERMANY AND n_name@4 = IRAQ OR n_name@3 = IRAQ AND n_name@4 = GERMANY - ProjectionExec: expr=[l_extendedprice@0 as l_extendedprice, l_discount@1 as l_discount, l_shipdate@2 as l_shipdate, n_name@4 as n_name, n_name@6 as n_name] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "c_nationkey", index: 3 }, Column { name: "n_nationkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "c_nationkey", index: 3 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], filter=n_name@0 = GERMANY AND n_name@1 = IRAQ OR n_name@0 = IRAQ AND n_name@1 = GERMANY, projection=[n_name@1, l_extendedprice@2, l_discount@3, l_shipdate@4, n_name@6] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "c_nationkey", index: 3 }], 4)) Query Stage #11 (4 -> 4): ShuffleWriterExec(stage_id=11, output_partitioning=Hash([Column { name: "supp_nation", index: 0 }, Column { name: "cust_nation", index: 1 }, Column { name: "l_year", index: 2 }], 4)) - SortExec: expr=[supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS LAST,l_year@2 ASC NULLS LAST] - ProjectionExec: expr=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year, SUM(shipping.volume)@3 as revenue] - AggregateExec: mode=FinalPartitioned, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[SUM(shipping.volume)] + SortExec: expr=[supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS LAST,l_year@2 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year, sum(shipping.volume)@3 as revenue] + AggregateExec: mode=FinalPartitioned, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[sum(shipping.volume)] CoalesceBatchesExec: target_batch_size=8192 ShuffleReaderExec(stage_id=10, input_partitioning=Hash([Column { name: "supp_nation", index: 0 }, Column { name: "cust_nation", index: 1 }, Column { name: "l_year", index: 2 }], 4)) diff --git a/testdata/expected-plans/q8.txt b/testdata/expected-plans/q8.txt index c315805..cce7bbe 100644 --- a/testdata/expected-plans/q8.txt +++ b/testdata/expected-plans/q8.txt @@ -2,10 +2,10 @@ DataFusion Logical Plan ======================= Sort: all_nations.o_year ASC NULLS LAST - Projection: all_nations.o_year, SUM(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END) / SUM(all_nations.volume) AS mkt_share - Aggregate: groupBy=[[all_nations.o_year]], aggr=[[SUM(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Decimal128(Some(0),35,4) END) AS SUM(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END), SUM(all_nations.volume)]] + Projection: all_nations.o_year, sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END) / sum(all_nations.volume) AS mkt_share + Aggregate: groupBy=[[all_nations.o_year]], aggr=[[sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Decimal128(Some(0),35,4) END) AS sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)]] SubqueryAlias: all_nations - Projection: datepart(Utf8("YEAR"), orders.o_orderdate) AS o_year, CAST(lineitem.l_extendedprice AS Decimal128(35, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(35, 4)) AS volume, n2.n_name AS nation + Projection: date_part(Utf8("YEAR"), orders.o_orderdate) AS o_year, lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount) AS volume, n2.n_name AS nation Inner Join: n1.n_regionkey = region.r_regionkey Projection: lineitem.l_extendedprice, lineitem.l_discount, orders.o_orderdate, n1.n_regionkey, n2.n_name Inner Join: supplier.s_nationkey = n2.n_nationkey @@ -24,8 +24,8 @@ Sort: all_nations.o_year ASC NULLS LAST TableScan: part projection=[p_partkey, p_type], partial_filters=[part.p_type = Utf8("LARGE PLATED STEEL")] TableScan: lineitem projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] TableScan: supplier projection=[s_suppkey, s_nationkey] - Filter: orders.o_orderdate >= Date32("9131") AND orders.o_orderdate <= Date32("9861") - TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate], partial_filters=[orders.o_orderdate >= Date32("9131"), orders.o_orderdate <= Date32("9861")] + Filter: orders.o_orderdate >= Date32("1995-01-01") AND orders.o_orderdate <= Date32("1996-12-31") + TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate], partial_filters=[orders.o_orderdate >= Date32("1995-01-01"), orders.o_orderdate <= Date32("1996-12-31")] TableScan: customer projection=[c_custkey, c_nationkey] SubqueryAlias: n1 TableScan: nation projection=[n_nationkey, n_regionkey] @@ -39,198 +39,198 @@ DataFusion Physical Plan ======================== SortPreservingMergeExec: [o_year@0 ASC NULLS LAST] - SortExec: expr=[o_year@0 ASC NULLS LAST] - ProjectionExec: expr=[o_year@0 as o_year, SUM(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END)@1 / SUM(all_nations.volume)@2 as mkt_share] - AggregateExec: mode=FinalPartitioned, gby=[o_year@0 as o_year], aggr=[SUM(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END), SUM(all_nations.volume)] + SortExec: expr=[o_year@0 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[o_year@0 as o_year, sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END)@1 / sum(all_nations.volume)@2 as mkt_share] + AggregateExec: mode=FinalPartitioned, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "o_year", index: 0 }], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[o_year@0 as o_year], aggr=[SUM(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END), SUM(all_nations.volume)] - ProjectionExec: expr=[datepart(YEAR, o_orderdate@2) as o_year, CAST(l_extendedprice@0 AS Decimal128(35, 4)) * CAST(Some(100),23,2 - CAST(l_discount@1 AS Decimal128(23, 2)) AS Decimal128(35, 4)) as volume, n_name@4 as nation] + RepartitionExec: partitioning=Hash([o_year@0], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)] + ProjectionExec: expr=[date_part(YEAR, o_orderdate@2) as o_year, l_extendedprice@0 * (Some(1),20,0 - l_discount@1) as volume, n_name@3 as nation] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "n_regionkey", index: 3 }, Column { name: "r_regionkey", index: 0 })] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, o_orderdate@3, n_name@5] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "n_regionkey", index: 3 }], 4), input_partitions=4 - ProjectionExec: expr=[l_extendedprice@0 as l_extendedprice, l_discount@1 as l_discount, o_orderdate@3 as o_orderdate, n_regionkey@4 as n_regionkey, n_name@6 as n_name] + RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: r_name@1 = MIDDLE EAST + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = MIDDLE EAST, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= MIDDLE EAST AND MIDDLE EAST <= r_name_max@1 END, required_guarantees=[r_name in (MIDDLE EAST)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([n_regionkey@3], 4), input_partitions=4 + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, o_orderdate@3 as o_orderdate, n_regionkey@4 as n_regionkey, n_name@0 as n_name] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 2 }, Column { name: "n_nationkey", index: 0 })] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, l_extendedprice@2, l_discount@3, o_orderdate@5, n_regionkey@6] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1 + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4), input_partitions=4 - ProjectionExec: expr=[l_extendedprice@0 as l_extendedprice, l_discount@1 as l_discount, s_nationkey@2 as s_nationkey, o_orderdate@3 as o_orderdate, n_regionkey@6 as n_regionkey] + RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4 + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, n_regionkey@0 as n_regionkey] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "c_nationkey", index: 4 }, Column { name: "n_nationkey", index: 0 })] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, c_nationkey@4)], projection=[n_regionkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@5] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "c_nationkey", index: 4 }], 4), input_partitions=4 - ProjectionExec: expr=[l_extendedprice@0 as l_extendedprice, l_discount@1 as l_discount, s_nationkey@2 as s_nationkey, o_orderdate@4 as o_orderdate, c_nationkey@6 as c_nationkey] + RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1 + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([c_nationkey@4], 4), input_partitions=4 + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, c_nationkey@0 as c_nationkey] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "o_custkey", index: 3 }, Column { name: "c_custkey", index: 0 })] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@3)], projection=[c_nationkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@6] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_nationkey] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "o_custkey", index: 3 }], 4), input_partitions=4 - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_custkey@5 as o_custkey, o_orderdate@6 as o_orderdate] + RepartitionExec: partitioning=Hash([o_custkey@3], 4), input_partitions=4 + ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, o_custkey@0 as o_custkey, o_orderdate@1 as o_orderdate] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_orderkey", index: 0 }, Column { name: "o_orderkey", index: 0 })] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_custkey@1, o_orderdate@2, l_extendedprice@4, l_discount@5, s_nationkey@6] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@2 >= 1995-01-01 AND o_orderdate@2 <= 1996-12-31 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1995-01-01 AND o_orderdate@4 <= 1996-12-31, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 <= 1996-12-31 END, required_guarantees=[] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4), input_partitions=4 - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@5 as s_nationkey] + RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 + ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@0 as s_nationkey] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_suppkey", index: 1 }, Column { name: "s_suppkey", index: 0 })] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4), input_partitions=4 - ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_suppkey@3 as l_suppkey, l_extendedprice@4 as l_extendedprice, l_discount@5 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "p_partkey", index: 0 }, Column { name: "l_partkey", index: 1 })] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4), input_partitions=4 - ProjectionExec: expr=[p_partkey@0 as p_partkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_type@1 = LARGE PLATED STEEL - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/part.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-3.parquet]]}, predicate=p_type@4 = LARGE PLATED STEEL, pruning_predicate=p_type_min@0 <= LARGE PLATED STEEL AND LARGE PLATED STEEL <= p_type_max@1, projection=[p_partkey, p_type] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "l_partkey", index: 1 }], 4), input_partitions=4 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] + RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4), input_partitions=4 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-3.parquet]]}, projection=[s_suppkey, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 >= 9131 AND o_orderdate@2 <= 9861 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-3.parquet]]}, predicate=o_orderdate@4 >= 9131 AND o_orderdate@4 <= 9861, pruning_predicate=o_orderdate_max@0 >= 9131 AND o_orderdate_min@1 <= 9861, projection=[o_orderkey, o_custkey, o_orderdate] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4), input_partitions=4 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-3.parquet]]}, projection=[c_custkey, c_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4), input_partitions=1 - ParquetExec: limit=None, partitions={1 group: [[mnt/bigdata/tpch/sf10-parquet/nation.parquet/part-0.parquet]]}, projection=[n_nationkey, n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4), input_partitions=1 - ParquetExec: limit=None, partitions={1 group: [[mnt/bigdata/tpch/sf10-parquet/nation.parquet/part-0.parquet]]}, projection=[n_nationkey, n_name] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = MIDDLE EAST - ParquetExec: limit=None, partitions={1 group: [[mnt/bigdata/tpch/sf10-parquet/region.parquet/part-0.parquet]]}, predicate=r_name@1 = MIDDLE EAST, pruning_predicate=r_name_min@0 <= MIDDLE EAST AND MIDDLE EAST <= r_name_max@1, projection=[r_regionkey, r_name] + RepartitionExec: partitioning=Hash([l_suppkey@1], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_suppkey@3, l_extendedprice@4, l_discount@5] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 + ProjectionExec: expr=[p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_type@1 = LARGE PLATED STEEL + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/part.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_type], predicate=p_type@4 = LARGE PLATED STEEL, pruning_predicate=CASE WHEN p_type_null_count@2 = p_type_row_count@3 THEN false ELSE p_type_min@0 <= LARGE PLATED STEEL AND LARGE PLATED STEEL <= p_type_max@1 END, required_guarantees=[p_type in (LARGE PLATED STEEL)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_partkey@1], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] RaySQL Plan =========== -Query Stage #0 (4 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - ProjectionExec: expr=[p_partkey@0 as p_partkey] +Query Stage #0 (1 -> 4): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_type@1 = LARGE PLATED STEEL - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/part.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-3.parquet]]}, predicate=p_type@4 = LARGE PLATED STEEL, pruning_predicate=p_type_min@0 <= LARGE PLATED STEEL AND LARGE PLATED STEEL <= p_type_max@1, projection=[p_partkey, p_type] + FilterExec: r_name@1 = MIDDLE EAST + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = MIDDLE EAST, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= MIDDLE EAST AND MIDDLE EAST <= r_name_max@1 END, required_guarantees=[r_name in (MIDDLE EAST)] -Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "l_partkey", index: 1 }], 4)) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] +Query Stage #1 (1 -> 4): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] -Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4)) - ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_suppkey@3 as l_suppkey, l_extendedprice@4 as l_extendedprice, l_discount@5 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "p_partkey", index: 0 }, Column { name: "l_partkey", index: 1 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "l_partkey", index: 1 }], 4)) +Query Stage #2 (1 -> 4): +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_regionkey] Query Stage #3 (4 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-3.parquet]]}, projection=[s_suppkey, s_nationkey] +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_nationkey] Query Stage #4 (4 -> 4): -ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@5 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_suppkey", index: 1 }, Column { name: "s_suppkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - -Query Stage #5 (4 -> 4): -ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) +ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 >= 9131 AND o_orderdate@2 <= 9861 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-3.parquet]]}, predicate=o_orderdate@4 >= 9131 AND o_orderdate@4 <= 9861, pruning_predicate=o_orderdate_max@0 >= 9131 AND o_orderdate_min@1 <= 9861, projection=[o_orderkey, o_custkey, o_orderdate] + FilterExec: o_orderdate@2 >= 1995-01-01 AND o_orderdate@2 <= 1996-12-31 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1995-01-01 AND o_orderdate@4 <= 1996-12-31, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 <= 1996-12-31 END, required_guarantees=[] + +Query Stage #5 (2 -> 4): +ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) + ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] -Query Stage #6 (4 -> 4): -ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "o_custkey", index: 3 }], 4)) - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_custkey@5 as o_custkey, o_orderdate@6 as o_orderdate] +Query Stage #6 (2 -> 4): +ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) + ProjectionExec: expr=[p_partkey@0 as p_partkey] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_orderkey", index: 0 }, Column { name: "o_orderkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) + FilterExec: p_type@1 = LARGE PLATED STEEL + ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/part.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_type], predicate=p_type@4 = LARGE PLATED STEEL, pruning_predicate=CASE WHEN p_type_null_count@2 = p_type_row_count@3 THEN false ELSE p_type_min@0 <= LARGE PLATED STEEL AND LARGE PLATED STEEL <= p_type_max@1 END, required_guarantees=[p_type in (LARGE PLATED STEEL)] Query Stage #7 (4 -> 4): -ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/customer.parquet/part-3.parquet]]}, projection=[c_custkey, c_nationkey] +ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "l_partkey", index: 1 }], 4)) + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] Query Stage #8 (4 -> 4): -ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "c_nationkey", index: 4 }], 4)) - ProjectionExec: expr=[l_extendedprice@0 as l_extendedprice, l_discount@1 as l_discount, s_nationkey@2 as s_nationkey, o_orderdate@4 as o_orderdate, c_nationkey@6 as c_nationkey] +ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_suppkey@3, l_extendedprice@4, l_discount@5] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "l_partkey", index: 1 }], 4)) + +Query Stage #9 (4 -> 4): +ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) + ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@0 as s_nationkey] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "o_custkey", index: 3 }, Column { name: "c_custkey", index: 0 })] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "o_custkey", index: 3 }], 4)) + ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - -Query Stage #9 (1 -> 4): -ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ParquetExec: limit=None, partitions={1 group: [[mnt/bigdata/tpch/sf10-parquet/nation.parquet/part-0.parquet]]}, projection=[n_nationkey, n_regionkey] + ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4)) Query Stage #10 (4 -> 4): -ShuffleWriterExec(stage_id=10, output_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) - ProjectionExec: expr=[l_extendedprice@0 as l_extendedprice, l_discount@1 as l_discount, s_nationkey@2 as s_nationkey, o_orderdate@3 as o_orderdate, n_regionkey@6 as n_regionkey] +ShuffleWriterExec(stage_id=10, output_partitioning=Hash([Column { name: "o_custkey", index: 3 }], 4)) + ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, o_custkey@0 as o_custkey, o_orderdate@1 as o_orderdate] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "c_nationkey", index: 4 }, Column { name: "n_nationkey", index: 0 })] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_custkey@1, o_orderdate@2, l_extendedprice@4, l_discount@5, s_nationkey@6] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "c_nationkey", index: 4 }], 4)) + ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) -Query Stage #11 (1 -> 4): -ShuffleWriterExec(stage_id=11, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ParquetExec: limit=None, partitions={1 group: [[mnt/bigdata/tpch/sf10-parquet/nation.parquet/part-0.parquet]]}, projection=[n_nationkey, n_name] +Query Stage #11 (4 -> 4): +ShuffleWriterExec(stage_id=11, output_partitioning=Hash([Column { name: "c_nationkey", index: 4 }], 4)) + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, c_nationkey@0 as c_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@3)], projection=[c_nationkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@6] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=10, input_partitioning=Hash([Column { name: "o_custkey", index: 3 }], 4)) Query Stage #12 (4 -> 4): -ShuffleWriterExec(stage_id=12, output_partitioning=Hash([Column { name: "n_regionkey", index: 3 }], 4)) - ProjectionExec: expr=[l_extendedprice@0 as l_extendedprice, l_discount@1 as l_discount, o_orderdate@3 as o_orderdate, n_regionkey@4 as n_regionkey, n_name@6 as n_name] +ShuffleWriterExec(stage_id=12, output_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, n_regionkey@0 as n_regionkey] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 2 }, Column { name: "n_nationkey", index: 0 })] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, c_nationkey@4)], projection=[n_regionkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@5] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=10, input_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=11, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + ShuffleReaderExec(stage_id=11, input_partitioning=Hash([Column { name: "c_nationkey", index: 4 }], 4)) -Query Stage #13 (1 -> 4): -ShuffleWriterExec(stage_id=13, output_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] +Query Stage #13 (4 -> 4): +ShuffleWriterExec(stage_id=13, output_partitioning=Hash([Column { name: "n_regionkey", index: 3 }], 4)) + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, o_orderdate@3 as o_orderdate, n_regionkey@4 as n_regionkey, n_name@0 as n_name] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = MIDDLE EAST - ParquetExec: limit=None, partitions={1 group: [[mnt/bigdata/tpch/sf10-parquet/region.parquet/part-0.parquet]]}, predicate=r_name@1 = MIDDLE EAST, pruning_predicate=r_name_min@0 <= MIDDLE EAST AND MIDDLE EAST <= r_name_max@1, projection=[r_regionkey, r_name] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, l_extendedprice@2, l_discount@3, o_orderdate@5, n_regionkey@6] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=12, input_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) Query Stage #14 (4 -> 4): ShuffleWriterExec(stage_id=14, output_partitioning=Hash([Column { name: "o_year", index: 0 }], 4)) - AggregateExec: mode=Partial, gby=[o_year@0 as o_year], aggr=[SUM(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END), SUM(all_nations.volume)] - ProjectionExec: expr=[datepart(YEAR, o_orderdate@2) as o_year, CAST(l_extendedprice@0 AS Decimal128(35, 4)) * CAST(Some(100),23,2 - CAST(l_discount@1 AS Decimal128(23, 2)) AS Decimal128(35, 4)) as volume, n_name@4 as nation] + AggregateExec: mode=Partial, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)] + ProjectionExec: expr=[date_part(YEAR, o_orderdate@2) as o_year, l_extendedprice@0 * (Some(1),20,0 - l_discount@1) as volume, n_name@3 as nation] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "n_regionkey", index: 3 }, Column { name: "r_regionkey", index: 0 })] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, o_orderdate@3, n_name@5] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=12, input_partitioning=Hash([Column { name: "n_regionkey", index: 3 }], 4)) + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=13, input_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) + ShuffleReaderExec(stage_id=13, input_partitioning=Hash([Column { name: "n_regionkey", index: 3 }], 4)) Query Stage #15 (4 -> 4): ShuffleWriterExec(stage_id=15, output_partitioning=Hash([Column { name: "o_year", index: 0 }], 4)) - SortExec: expr=[o_year@0 ASC NULLS LAST] - ProjectionExec: expr=[o_year@0 as o_year, SUM(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END)@1 / SUM(all_nations.volume)@2 as mkt_share] - AggregateExec: mode=FinalPartitioned, gby=[o_year@0 as o_year], aggr=[SUM(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END), SUM(all_nations.volume)] + SortExec: expr=[o_year@0 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[o_year@0 as o_year, sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END)@1 / sum(all_nations.volume)@2 as mkt_share] + AggregateExec: mode=FinalPartitioned, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)] CoalesceBatchesExec: target_batch_size=8192 ShuffleReaderExec(stage_id=14, input_partitioning=Hash([Column { name: "o_year", index: 0 }], 4)) diff --git a/testdata/expected-plans/q9.txt b/testdata/expected-plans/q9.txt index 5afd3dc..e13b865 100644 --- a/testdata/expected-plans/q9.txt +++ b/testdata/expected-plans/q9.txt @@ -2,10 +2,10 @@ DataFusion Logical Plan ======================= Sort: profit.nation ASC NULLS LAST, profit.o_year DESC NULLS FIRST - Projection: profit.nation, profit.o_year, SUM(profit.amount) AS sum_profit - Aggregate: groupBy=[[profit.nation, profit.o_year]], aggr=[[SUM(profit.amount)]] + Projection: profit.nation, profit.o_year, sum(profit.amount) AS sum_profit + Aggregate: groupBy=[[profit.nation, profit.o_year]], aggr=[[sum(profit.amount)]] SubqueryAlias: profit - Projection: nation.n_name AS nation, datepart(Utf8("YEAR"), orders.o_orderdate) AS o_year, CAST(CAST(lineitem.l_extendedprice AS Decimal128(35, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(35, 4)) AS Decimal128(36, 4)) - CAST(partsupp.ps_supplycost * lineitem.l_quantity AS Decimal128(36, 4)) AS amount + Projection: nation.n_name AS nation, date_part(Utf8("YEAR"), orders.o_orderdate) AS o_year, lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount) - partsupp.ps_supplycost * lineitem.l_quantity AS amount Inner Join: supplier.s_nationkey = nation.n_nationkey Projection: lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey, partsupp.ps_supplycost, orders.o_orderdate Inner Join: lineitem.l_orderkey = orders.o_orderkey @@ -28,143 +28,143 @@ DataFusion Physical Plan ======================== SortPreservingMergeExec: [nation@0 ASC NULLS LAST,o_year@1 DESC] - SortExec: expr=[nation@0 ASC NULLS LAST,o_year@1 DESC] - ProjectionExec: expr=[nation@0 as nation, o_year@1 as o_year, SUM(profit.amount)@2 as sum_profit] - AggregateExec: mode=FinalPartitioned, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[SUM(profit.amount)] + SortExec: expr=[nation@0 ASC NULLS LAST,o_year@1 DESC], preserve_partitioning=[true] + ProjectionExec: expr=[nation@0 as nation, o_year@1 as o_year, sum(profit.amount)@2 as sum_profit] + AggregateExec: mode=FinalPartitioned, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "nation", index: 0 }, Column { name: "o_year", index: 1 }], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[SUM(profit.amount)] - ProjectionExec: expr=[n_name@7 as nation, datepart(YEAR, o_orderdate@5) as o_year, CAST(CAST(l_extendedprice@1 AS Decimal128(35, 4)) * CAST(Some(100),23,2 - CAST(l_discount@2 AS Decimal128(23, 2)) AS Decimal128(35, 4)) AS Decimal128(36, 4)) - CAST(ps_supplycost@4 * l_quantity@0 AS Decimal128(36, 4)) as amount] + RepartitionExec: partitioning=Hash([nation@0, o_year@1], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)] + ProjectionExec: expr=[n_name@0 as nation, date_part(YEAR, o_orderdate@5) as o_year, l_extendedprice@2 * (Some(1),20,0 - l_discount@3) - ps_supplycost@4 * l_quantity@1 as amount] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 3 }, Column { name: "n_nationkey", index: 0 })] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[n_name@1, l_quantity@2, l_extendedprice@3, l_discount@4, ps_supplycost@6, o_orderdate@7] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4), input_partitions=4 - ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, ps_supplycost@5 as ps_supplycost, o_orderdate@7 as o_orderdate] + RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1 + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_nationkey@3], 4), input_partitions=4 + ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, ps_supplycost@5 as ps_supplycost, o_orderdate@0 as o_orderdate] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_orderkey", index: 0 }, Column { name: "o_orderkey", index: 0 })] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, l_quantity@3, l_extendedprice@4, l_discount@5, s_nationkey@6, ps_supplycost@7] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_orderdate] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4), input_partitions=4 - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_quantity@3 as l_quantity, l_extendedprice@4 as l_extendedprice, l_discount@5 as l_discount, s_nationkey@6 as s_nationkey, ps_supplycost@9 as ps_supplycost] + RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 + ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_quantity@2 as l_quantity, l_extendedprice@3 as l_extendedprice, l_discount@4 as l_discount, s_nationkey@5 as s_nationkey, ps_supplycost@0 as ps_supplycost] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_suppkey", index: 2 }, Column { name: "ps_suppkey", index: 1 }), (Column { name: "l_partkey", index: 1 }, Column { name: "ps_partkey", index: 0 })] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@1, l_suppkey@2), (ps_partkey@0, l_partkey@1)], projection=[ps_supplycost@2, l_orderkey@3, l_quantity@6, l_extendedprice@7, l_discount@8, s_nationkey@9] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "l_suppkey", index: 2 }, Column { name: "l_partkey", index: 1 }], 4), input_partitions=4 - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_partkey@1 as l_partkey, l_suppkey@2 as l_suppkey, l_quantity@3 as l_quantity, l_extendedprice@4 as l_extendedprice, l_discount@5 as l_discount, s_nationkey@7 as s_nationkey] + RepartitionExec: partitioning=Hash([ps_suppkey@1, ps_partkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_suppkey@2, l_partkey@1], 4), input_partitions=4 + ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_partkey@2 as l_partkey, l_suppkey@3 as l_suppkey, l_quantity@4 as l_quantity, l_extendedprice@5 as l_extendedprice, l_discount@6 as l_discount, s_nationkey@0 as s_nationkey] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_suppkey", index: 2 }, Column { name: "s_suppkey", index: 0 })] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@2)], projection=[s_nationkey@1, l_orderkey@2, l_partkey@3, l_suppkey@4, l_quantity@5, l_extendedprice@6, l_discount@7] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "l_suppkey", index: 2 }], 4), input_partitions=4 - ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_partkey@2 as l_partkey, l_suppkey@3 as l_suppkey, l_quantity@4 as l_quantity, l_extendedprice@5 as l_extendedprice, l_discount@6 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "p_partkey", index: 0 }, Column { name: "l_partkey", index: 1 })] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4), input_partitions=4 - ProjectionExec: expr=[p_partkey@0 as p_partkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_name@1 LIKE %moccasin% - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/part.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-3.parquet]]}, predicate=p_name@1 LIKE %moccasin%, projection=[p_partkey, p_name] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "l_partkey", index: 1 }], 4), input_partitions=4 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] + RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4), input_partitions=4 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-3.parquet]]}, projection=[s_suppkey, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "ps_suppkey", index: 1 }, Column { name: "ps_partkey", index: 0 }], 4), input_partitions=4 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-3.parquet]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4), input_partitions=4 - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-3.parquet]]}, projection=[o_orderkey, o_orderdate] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4), input_partitions=1 - ParquetExec: limit=None, partitions={1 group: [[mnt/bigdata/tpch/sf10-parquet/nation.parquet/part-0.parquet]]}, projection=[n_nationkey, n_name] + RepartitionExec: partitioning=Hash([l_suppkey@2], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_partkey@2, l_suppkey@3, l_quantity@4, l_extendedprice@5, l_discount@6] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 + ProjectionExec: expr=[p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_name@1 LIKE %moccasin% + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/part.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE %moccasin% + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_partkey@1], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] RaySQL Plan =========== -Query Stage #0 (4 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - ProjectionExec: expr=[p_partkey@0 as p_partkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_name@1 LIKE %moccasin% - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/part.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/part.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/part.parquet/part-3.parquet]]}, predicate=p_name@1 LIKE %moccasin%, projection=[p_partkey, p_name] +Query Stage #0 (1 -> 4): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "l_partkey", index: 1 }], 4)) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/lineitem.parquet/part-3.parquet]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_orderdate] Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "l_suppkey", index: 2 }], 4)) - ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_partkey@2 as l_partkey, l_suppkey@3 as l_suppkey, l_quantity@4 as l_quantity, l_extendedprice@5 as l_extendedprice, l_discount@6 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "p_partkey", index: 0 }, Column { name: "l_partkey", index: 1 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "l_partkey", index: 1 }], 4)) +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }, Column { name: "ps_partkey", index: 0 }], 4)) + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] -Query Stage #3 (4 -> 4): +Query Stage #3 (2 -> 4): ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/supplier.parquet/part-3.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] -Query Stage #4 (4 -> 4): -ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "l_suppkey", index: 2 }, Column { name: "l_partkey", index: 1 }], 4)) - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_partkey@1 as l_partkey, l_suppkey@2 as l_suppkey, l_quantity@3 as l_quantity, l_extendedprice@4 as l_extendedprice, l_discount@5 as l_discount, s_nationkey@7 as s_nationkey] +Query Stage #4 (2 -> 4): +ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) + ProjectionExec: expr=[p_partkey@0 as p_partkey] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_suppkey", index: 2 }, Column { name: "s_suppkey", index: 0 })] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "l_suppkey", index: 2 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) + FilterExec: p_name@1 LIKE %moccasin% + ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/part.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE %moccasin% Query Stage #5 (4 -> 4): -ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }, Column { name: "ps_partkey", index: 0 }], 4)) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/partsupp.parquet/part-3.parquet]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] +ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "l_partkey", index: 1 }], 4)) + ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] Query Stage #6 (4 -> 4): -ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_quantity@3 as l_quantity, l_extendedprice@4 as l_extendedprice, l_discount@5 as l_discount, s_nationkey@6 as s_nationkey, ps_supplycost@9 as ps_supplycost] +ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "l_suppkey", index: 2 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_partkey@2, l_suppkey@3, l_quantity@4, l_extendedprice@5, l_discount@6] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "l_partkey", index: 1 }], 4)) + +Query Stage #7 (4 -> 4): +ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "l_suppkey", index: 2 }, Column { name: "l_partkey", index: 1 }], 4)) + ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_partkey@2 as l_partkey, l_suppkey@3 as l_suppkey, l_quantity@4 as l_quantity, l_extendedprice@5 as l_extendedprice, l_discount@6 as l_discount, s_nationkey@0 as s_nationkey] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_suppkey", index: 2 }, Column { name: "ps_suppkey", index: 1 }), (Column { name: "l_partkey", index: 1 }, Column { name: "ps_partkey", index: 0 })] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@2)], projection=[s_nationkey@1, l_orderkey@2, l_partkey@3, l_suppkey@4, l_quantity@5, l_extendedprice@6, l_discount@7] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "l_suppkey", index: 2 }, Column { name: "l_partkey", index: 1 }], 4)) + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }, Column { name: "ps_partkey", index: 0 }], 4)) - -Query Stage #7 (4 -> 4): -ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - ParquetExec: limit=None, partitions={4 groups: [[mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-5.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-10.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-11.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-13.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-6.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-14.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-20.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-2.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-22.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-19.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-0.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-16.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-21.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-23.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-4.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-17.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-9.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-1.parquet], [mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-18.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-8.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-12.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-15.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-7.parquet, mnt/bigdata/tpch/sf10-parquet/orders.parquet/part-3.parquet]]}, projection=[o_orderkey, o_orderdate] + ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "l_suppkey", index: 2 }], 4)) Query Stage #8 (4 -> 4): -ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4)) - ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, ps_supplycost@5 as ps_supplycost, o_orderdate@7 as o_orderdate] +ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) + ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_quantity@2 as l_quantity, l_extendedprice@3 as l_extendedprice, l_discount@4 as l_discount, s_nationkey@5 as s_nationkey, ps_supplycost@0 as ps_supplycost] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_orderkey", index: 0 }, Column { name: "o_orderkey", index: 0 })] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@1, l_suppkey@2), (ps_partkey@0, l_partkey@1)], projection=[ps_supplycost@2, l_orderkey@3, l_quantity@6, l_extendedprice@7, l_discount@8, s_nationkey@9] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }, Column { name: "ps_partkey", index: 0 }], 4)) CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) + ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "l_suppkey", index: 2 }, Column { name: "l_partkey", index: 1 }], 4)) -Query Stage #9 (1 -> 4): -ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ParquetExec: limit=None, partitions={1 group: [[mnt/bigdata/tpch/sf10-parquet/nation.parquet/part-0.parquet]]}, projection=[n_nationkey, n_name] +Query Stage #9 (4 -> 4): +ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4)) + ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, ps_supplycost@5 as ps_supplycost, o_orderdate@0 as o_orderdate] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, l_quantity@3, l_extendedprice@4, l_discount@5, s_nationkey@6, ps_supplycost@7] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) Query Stage #10 (4 -> 4): ShuffleWriterExec(stage_id=10, output_partitioning=Hash([Column { name: "nation", index: 0 }, Column { name: "o_year", index: 1 }], 4)) - AggregateExec: mode=Partial, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[SUM(profit.amount)] - ProjectionExec: expr=[n_name@7 as nation, datepart(YEAR, o_orderdate@5) as o_year, CAST(CAST(l_extendedprice@1 AS Decimal128(35, 4)) * CAST(Some(100),23,2 - CAST(l_discount@2 AS Decimal128(23, 2)) AS Decimal128(35, 4)) AS Decimal128(36, 4)) - CAST(ps_supplycost@4 * l_quantity@0 AS Decimal128(36, 4)) as amount] + AggregateExec: mode=Partial, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)] + ProjectionExec: expr=[n_name@0 as nation, date_part(YEAR, o_orderdate@5) as o_year, l_extendedprice@2 * (Some(1),20,0 - l_discount@3) - ps_supplycost@4 * l_quantity@1 as amount] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 3 }, Column { name: "n_nationkey", index: 0 })] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[n_name@1, l_quantity@2, l_extendedprice@3, l_discount@4, ps_supplycost@6, o_orderdate@7] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4)) + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4)) Query Stage #11 (4 -> 4): ShuffleWriterExec(stage_id=11, output_partitioning=Hash([Column { name: "nation", index: 0 }, Column { name: "o_year", index: 1 }], 4)) - SortExec: expr=[nation@0 ASC NULLS LAST,o_year@1 DESC] - ProjectionExec: expr=[nation@0 as nation, o_year@1 as o_year, SUM(profit.amount)@2 as sum_profit] - AggregateExec: mode=FinalPartitioned, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[SUM(profit.amount)] + SortExec: expr=[nation@0 ASC NULLS LAST,o_year@1 DESC], preserve_partitioning=[true] + ProjectionExec: expr=[nation@0 as nation, o_year@1 as o_year, sum(profit.amount)@2 as sum_profit] + AggregateExec: mode=FinalPartitioned, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)] CoalesceBatchesExec: target_batch_size=8192 ShuffleReaderExec(stage_id=10, input_partitioning=Hash([Column { name: "nation", index: 0 }, Column { name: "o_year", index: 1 }], 4)) diff --git a/tpch/requirements.txt b/tpch/requirements.txt new file mode 100644 index 0000000..2d257db --- /dev/null +++ b/tpch/requirements.txt @@ -0,0 +1,4 @@ +# This is a bad idea, we should lock dependencies with poetry and consume this tool as an action +pyarrow +datafusion +argparse diff --git a/tpch/tpchgen.py b/tpch/tpchgen.py new file mode 100644 index 0000000..e1822ab --- /dev/null +++ b/tpch/tpchgen.py @@ -0,0 +1,264 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import argparse +import concurrent.futures +from datafusion import SessionContext +import os +import pyarrow +import subprocess +import time + +table_names = [ + "customer", + "lineitem", + "nation", + "orders", + "part", + "partsupp", + "region", + "supplier", +] + +# schema definition copied from DataFusion Python tpch example +all_schemas = {} + +all_schemas["customer"] = [ + ("C_CUSTKEY", pyarrow.int64()), + ("C_NAME", pyarrow.string()), + ("C_ADDRESS", pyarrow.string()), + ("C_NATIONKEY", pyarrow.int64()), + ("C_PHONE", pyarrow.string()), + ("C_ACCTBAL", pyarrow.decimal128(11, 2)), + ("C_MKTSEGMENT", pyarrow.string()), + ("C_COMMENT", pyarrow.string()), +] + +all_schemas["lineitem"] = [ + ("L_ORDERKEY", pyarrow.int64()), + ("L_PARTKEY", pyarrow.int64()), + ("L_SUPPKEY", pyarrow.int64()), + ("L_LINENUMBER", pyarrow.int32()), + ("L_QUANTITY", pyarrow.decimal128(11, 2)), + ("L_EXTENDEDPRICE", pyarrow.decimal128(11, 2)), + ("L_DISCOUNT", pyarrow.decimal128(11, 2)), + ("L_TAX", pyarrow.decimal128(11, 2)), + ("L_RETURNFLAG", pyarrow.string()), + ("L_LINESTATUS", pyarrow.string()), + ("L_SHIPDATE", pyarrow.date32()), + ("L_COMMITDATE", pyarrow.date32()), + ("L_RECEIPTDATE", pyarrow.date32()), + ("L_SHIPINSTRUCT", pyarrow.string()), + ("L_SHIPMODE", pyarrow.string()), + ("L_COMMENT", pyarrow.string()), +] + +all_schemas["nation"] = [ + ("N_NATIONKEY", pyarrow.int64()), + ("N_NAME", pyarrow.string()), + ("N_REGIONKEY", pyarrow.int64()), + ("N_COMMENT", pyarrow.string()), +] + +all_schemas["orders"] = [ + ("O_ORDERKEY", pyarrow.int64()), + ("O_CUSTKEY", pyarrow.int64()), + ("O_ORDERSTATUS", pyarrow.string()), + ("O_TOTALPRICE", pyarrow.decimal128(11, 2)), + ("O_ORDERDATE", pyarrow.date32()), + ("O_ORDERPRIORITY", pyarrow.string()), + ("O_CLERK", pyarrow.string()), + ("O_SHIPPRIORITY", pyarrow.int32()), + ("O_COMMENT", pyarrow.string()), +] + +all_schemas["part"] = [ + ("P_PARTKEY", pyarrow.int64()), + ("P_NAME", pyarrow.string()), + ("P_MFGR", pyarrow.string()), + ("P_BRAND", pyarrow.string()), + ("P_TYPE", pyarrow.string()), + ("P_SIZE", pyarrow.int32()), + ("P_CONTAINER", pyarrow.string()), + ("P_RETAILPRICE", pyarrow.decimal128(11, 2)), + ("P_COMMENT", pyarrow.string()), +] + +all_schemas["partsupp"] = [ + ("PS_PARTKEY", pyarrow.int64()), + ("PS_SUPPKEY", pyarrow.int64()), + ("PS_AVAILQTY", pyarrow.int32()), + ("PS_SUPPLYCOST", pyarrow.decimal128(11, 2)), + ("PS_COMMENT", pyarrow.string()), +] + +all_schemas["region"] = [ + ("R_REGIONKEY", pyarrow.int64()), + ("R_NAME", pyarrow.string()), + ("R_COMMENT", pyarrow.string()), +] + +all_schemas["supplier"] = [ + ("S_SUPPKEY", pyarrow.int64()), + ("S_NAME", pyarrow.string()), + ("S_ADDRESS", pyarrow.string()), + ("S_NATIONKEY", pyarrow.int64()), + ("S_PHONE", pyarrow.string()), + ("S_ACCTBAL", pyarrow.decimal128(11, 2)), + ("S_COMMENT", pyarrow.string()), +] + + +def run(cmd: str): + print(f"Executing: {cmd}") + subprocess.run(cmd, shell=True, check=True) + + +def run_and_log_output(cmd: str, log_file: str): + print(f"Executing: {cmd}; writing output to {log_file}") + with open(log_file, "w") as file: + subprocess.run( + cmd, shell=True, check=True, stdout=file, stderr=subprocess.STDOUT + ) + + +def convert_tbl_to_parquet( + ctx: SessionContext, + table: str, + tbl_filename: str, + file_extension: str, + parquet_filename: str, +): + print(f"Converting {tbl_filename} to {parquet_filename} ...") + + # schema manipulation code copied from DataFusion Python tpch example + table_schema = [ + pyarrow.field(r[0].lower(), r[1], nullable=False) for r in all_schemas[table] + ] + + # Pre-collect the output columns so we can ignore the null field we add + # in to handle the trailing | in the file + output_cols = [r.name for r in table_schema] + + # Trailing | requires extra field for in processing + table_schema.append(pyarrow.field("some_null", pyarrow.null(), nullable=True)) + + schema = pyarrow.schema(table_schema) + + df = ctx.read_csv( + tbl_filename, + schema=schema, + has_header=False, + file_extension=file_extension, + delimiter="|", + ) + df = df.select_columns(*output_cols) + df.write_parquet(parquet_filename, compression="snappy") + + +def generate_tpch(scale_factor: int, partitions: int): + start_time = time.time() + docker_cmd = os.getenv("DOCKER_CMD", "docker") + if partitions == 1: + command = f"{docker_cmd} run -v `pwd`/data:/data -t --rm ghcr.io/scalytics/tpch-docker:main -vf -s {scale_factor}" + run_and_log_output(command, "/tmp/tpchgen.log") + else: + max_threads = os.cpu_count() + + # List of commands to run + commands = [ + ( + f"{docker_cmd} run -v `pwd`/data:/data -t --rm ghcr.io/scalytics/tpch-docker:main -vf -s {scale_factor} -C {partitions} -S {part}", + f"/tmp/tpchgen-part{part}.log", + ) + for part in range(1, partitions + 1) + ] + + # run commands in parallel + with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads) as executor: + futures = [ + executor.submit(run_and_log_output, command, log_file) + for (command, log_file) in commands + ] + + # wait for all futures to complete + for future in concurrent.futures.as_completed(futures): + try: + future.result() + except Exception as e: + print(f"Command failed with exception: {e}") + + end_time = time.time() + print(f"Generated CSV data in {round(end_time - start_time, 2)} seconds") + + +def convert_tpch(partitions: int): + start_time = time.time() + ctx = SessionContext() + if partitions == 1: + # convert to parquet + for table in table_names: + convert_tbl_to_parquet( + ctx, table, f"data/{table}.tbl", "tbl", f"data/{table}.parquet" + ) + else: + for table in table_names: + run(f"mkdir -p data/{table}.parquet") + if table == "nation" or table == "region": + # nation and region are special cases and do not generate multiple files + convert_tbl_to_parquet( + ctx, + table, + f"data/{table}.tbl", + "tbl", + f"data/{table}.parquet/part1.parquet", + ) + else: + for part in range(1, partitions + 1): + convert_tbl_to_parquet( + ctx, + table, + f"data/{table}.tbl.{part}", + f"tbl.{part}", + f"data/{table}.parquet/part{part}.parquet", + ) + end_time = time.time() + print(f"Converted CSV to Parquet in {round(end_time - start_time, 2)} seconds") + + +if __name__ == "__main__": + arg_parser = argparse.ArgumentParser() + subparsers = arg_parser.add_subparsers(dest="command", help="Available commands") + + parser_generate = subparsers.add_parser("generate", help="Generate TPC-H CSV Data") + parser_generate.add_argument("--scale-factor", type=int, help="The scale factor") + parser_generate.add_argument( + "--partitions", type=int, help="The number of partitions" + ) + + parser_convert = subparsers.add_parser( + "convert", help="Convert TPC-H CSV Data to Parquet" + ) + parser_convert.add_argument( + "--partitions", type=int, help="The number of partitions" + ) + + args = arg_parser.parse_args() + if args.command == "generate": + generate_tpch(args.scale_factor, args.partitions) + elif args.command == "convert": + convert_tpch(args.partitions) From c750b4b001d535fffa293fefc3e68735628e83ea Mon Sep 17 00:00:00 2001 From: Edmondo Porcu Date: Thu, 3 Oct 2024 12:08:45 -0400 Subject: [PATCH 02/14] Using tpch script from datafusion-benchmarks --- README.md | 40 ++++++++++++++++++---- scripts/replace-expected-plans-paths.sh | 44 +++++++++++++++++++++++++ testdata/expected-plans/q1.txt | 4 +-- testdata/expected-plans/q10.txt | 16 ++++----- testdata/expected-plans/q11.txt | 24 +++++++------- testdata/expected-plans/q12.txt | 8 ++--- testdata/expected-plans/q13.txt | 8 ++--- testdata/expected-plans/q14.txt | 8 ++--- testdata/expected-plans/q16.txt | 12 +++---- testdata/expected-plans/q17.txt | 12 +++---- testdata/expected-plans/q18.txt | 16 ++++----- testdata/expected-plans/q19.txt | 8 ++--- testdata/expected-plans/q2.txt | 36 ++++++++++---------- testdata/expected-plans/q20.txt | 20 +++++------ testdata/expected-plans/q21.txt | 24 +++++++------- testdata/expected-plans/q22.txt | 12 +++---- testdata/expected-plans/q3.txt | 12 +++---- testdata/expected-plans/q4.txt | 8 ++--- testdata/expected-plans/q5.txt | 24 +++++++------- testdata/expected-plans/q6.txt | 4 +-- testdata/expected-plans/q7.txt | 24 +++++++------- testdata/expected-plans/q8.txt | 32 +++++++++--------- testdata/expected-plans/q9.txt | 24 +++++++------- 23 files changed, 246 insertions(+), 174 deletions(-) create mode 100755 scripts/replace-expected-plans-paths.sh diff --git a/README.md b/README.md index 5aa86e0..dc3ba9e 100644 --- a/README.md +++ b/README.md @@ -19,8 +19,8 @@ # DataFusion on Ray -> This was originally a research project donated from [ray-sql](https://github.com/datafusion-contrib/ray-sql) to evaluate performing distributed SQL queries from Python, using -[Ray](https://www.ray.io/) and [DataFusion](https://github.com/apache/arrow-datafusion). +> This was originally a research project donated from [ray-sql](https://github.com/datafusion-contrib/ray-sql) to evaluate performing distributed SQL queries from Python, using +> [Ray](https://www.ray.io/) and [DataFusion](https://github.com/apache/arrow-datafusion). DataFusion Ray is a distributed SQL query engine powered by the Rust implementation of [Apache Arrow](https://arrow.apache.org/), [Apache DataFusion](https://datafusion.apache.org/) and [Ray](https://www.ray.io/). @@ -33,7 +33,7 @@ DataFusion Ray is a distributed SQL query engine powered by the Rust implementat ## Non Goals -- Re-build the cluster scheduling systems like what [Ballista](https://datafusion.apache.org/ballista/) did. +- Re-build the cluster scheduling systems like what [Ballista](https://datafusion.apache.org/ballista/) did. - Ballista is extremely complex and utilizing Ray feels like it abstracts some of that complexity away. - Datafusion Ray is delegating cluster management to Ray. @@ -120,10 +120,38 @@ python -m pip install -r requirements-in.txt Whenever rust code changes (your changes or via `git pull`): -```bash +````bash # make sure you activate the venv using "source venv/bin/activate" first -maturin develop -python -m pytest +maturin develop python -m pytest ``` + + +## Testing + +Running local Rust tests require generating the tpch-data. This can be done +by running the following command: + +```bash ./scripts/generate_tpch_data.sh ``` + +Tests compare plans with expected plans, which unfortunately contain the +path to the parquet tables. The path committed under version control is +the one of a Github Runner, and won't work locally. You can fix it by +running the following command: + +```bash + +./scripts/replace-expected-plan-paths.sh local-dev + +```` + +When instead you need to regenerate the plans, which you can do by +re-running the planner tests removing all the content of +`testdata/expected-plans`, they will now contain your local paths. You can +fix it before committing the plans running + +```bash + +./scripts/replace-expected-plan-paths.sh pre-ci + ``` ## Benchmarking diff --git a/scripts/replace-expected-plans-paths.sh b/scripts/replace-expected-plans-paths.sh new file mode 100755 index 0000000..527fd97 --- /dev/null +++ b/scripts/replace-expected-plans-paths.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +# This script helps change the path to parquet files in expected plans for +# local development and CI + +set -e + +if [ "$#" -ne 1 ]; then + echo "Usage: $0 " + echo "Modes: pre-ci, local-dev" + exit 1 +fi + +# Assign the parameter to the mode variable +mode=$1 + +ci_dir="home/runner/work/datafusion-ray/datafusion-ray" +current_dir=$(pwd) +current_dir_no_leading_slash="${current_dir#/}" +expected_plans_dir="./testdata/expected-plans" + +# Function to replace paths in files +replace_paths() { + local search=$1 + local replace=$2 + find "$expected_plans_dir" -type f -exec sed -i "s|$search|$replace|g" {} + + echo "Replaced all occurrences of '$search' with '$replace' in files within '$expected_plans_dir'." +} + +# Handle the modes +case $mode in +pre-ci) + replace_paths "$current_dir_no_leading_slash" "$ci_dir" + ;; +local-dev) + replace_paths "$ci_dir" "$current_dir_no_leading_slash" + ;; +*) + echo "Invalid mode: $mode" + echo "Usage: $0 " + echo "Modes: pre-ci, local-dev" + exit 1 + ;; +esac diff --git a/testdata/expected-plans/q1.txt b/testdata/expected-plans/q1.txt index 73cce46..111c15f 100644 --- a/testdata/expected-plans/q1.txt +++ b/testdata/expected-plans/q1.txt @@ -21,7 +21,7 @@ SortPreservingMergeExec: [l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, l_quantity@0 as l_quantity, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_tax@3 as l_tax, l_returnflag@4 as l_returnflag, l_linestatus@5 as l_linestatus] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@6 <= 1998-09-24 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], predicate=l_shipdate@10 <= 1998-09-24, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@0 <= 1998-09-24 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], predicate=l_shipdate@10 <= 1998-09-24, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@0 <= 1998-09-24 END, required_guarantees=[] RaySQL Plan =========== @@ -32,7 +32,7 @@ ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "l_return ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, l_quantity@0 as l_quantity, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_tax@3 as l_tax, l_returnflag@4 as l_returnflag, l_linestatus@5 as l_linestatus] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@6 <= 1998-09-24 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], predicate=l_shipdate@10 <= 1998-09-24, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@0 <= 1998-09-24 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], predicate=l_shipdate@10 <= 1998-09-24, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@0 <= 1998-09-24 END, required_guarantees=[] Query Stage #1 (4 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "l_returnflag", index: 0 }, Column { name: "l_linestatus", index: 1 }], 4)) diff --git a/testdata/expected-plans/q10.txt b/testdata/expected-plans/q10.txt index 607f1d7..c87cc6d 100644 --- a/testdata/expected-plans/q10.txt +++ b/testdata/expected-plans/q10.txt @@ -36,7 +36,7 @@ GlobalLimitExec: skip=0, fetch=20 HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], projection=[n_name@1, c_custkey@2, c_name@3, c_address@4, c_phone@6, c_acctbal@7, c_comment@8, l_extendedprice@9, l_discount@10] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([c_nationkey@3], 4), input_partitions=4 CoalesceBatchesExec: target_batch_size=8192 @@ -47,37 +47,37 @@ GlobalLimitExec: skip=0, fetch=20 HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, o_orderkey@7] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4 ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@2 >= 1993-07-01 AND o_orderdate@2 < 1993-10-01 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1993-07-01 AND o_orderdate@4 < 1993-10-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1993-07-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1993-10-01 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1993-07-01 AND o_orderdate@4 < 1993-10-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1993-07-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1993-10-01 END, required_guarantees=[] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_returnflag@3 = R - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], predicate=l_returnflag@8 = R, pruning_predicate=CASE WHEN l_returnflag_null_count@2 = l_returnflag_row_count@3 THEN false ELSE l_returnflag_min@0 <= R AND R <= l_returnflag_max@1 END, required_guarantees=[l_returnflag in (R)] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], predicate=l_returnflag@8 = R, pruning_predicate=CASE WHEN l_returnflag_null_count@2 = l_returnflag_row_count@3 THEN false ELSE l_returnflag_min@0 <= R AND R <= l_returnflag_max@1 END, required_guarantees=[l_returnflag in (R)] RaySQL Plan =========== Query Stage #0 (1 -> 4): ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] Query Stage #1 (4 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] Query Stage #2 (4 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@2 >= 1993-07-01 AND o_orderdate@2 < 1993-10-01 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1993-07-01 AND o_orderdate@4 < 1993-10-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1993-07-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1993-10-01 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1993-07-01 AND o_orderdate@4 < 1993-10-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1993-07-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1993-10-01 END, required_guarantees=[] Query Stage #3 (4 -> 4): ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "o_orderkey", index: 7 }], 4)) @@ -93,7 +93,7 @@ ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "l_orderk ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_returnflag@3 = R - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], predicate=l_returnflag@8 = R, pruning_predicate=CASE WHEN l_returnflag_null_count@2 = l_returnflag_row_count@3 THEN false ELSE l_returnflag_min@0 <= R AND R <= l_returnflag_max@1 END, required_guarantees=[l_returnflag in (R)] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], predicate=l_returnflag@8 = R, pruning_predicate=CASE WHEN l_returnflag_null_count@2 = l_returnflag_row_count@3 THEN false ELSE l_returnflag_min@0 <= R AND R <= l_returnflag_max@1 END, required_guarantees=[l_returnflag in (R)] Query Stage #5 (4 -> 4): ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "c_nationkey", index: 3 }], 4)) diff --git a/testdata/expected-plans/q11.txt b/testdata/expected-plans/q11.txt index ff6c98f..84a0680 100644 --- a/testdata/expected-plans/q11.txt +++ b/testdata/expected-plans/q11.txt @@ -46,7 +46,7 @@ SortPreservingMergeExec: [value@1 DESC] ProjectionExec: expr=[n_nationkey@0 as n_nationkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = ALGERIA - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4 ProjectionExec: expr=[ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] @@ -55,10 +55,10 @@ SortPreservingMergeExec: [value@1 DESC] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost] AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4 @@ -71,7 +71,7 @@ SortPreservingMergeExec: [value@1 DESC] ProjectionExec: expr=[n_nationkey@0 as n_nationkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = ALGERIA - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([s_nationkey@3], 4), input_partitions=4 ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_availqty@2 as ps_availqty, ps_supplycost@3 as ps_supplycost, s_nationkey@0 as s_nationkey] @@ -80,10 +80,10 @@ SortPreservingMergeExec: [value@1 DESC] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([ps_suppkey@1], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] RaySQL Plan =========== @@ -93,15 +93,15 @@ ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nation ProjectionExec: expr=[n_nationkey@0 as n_nationkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = ALGERIA - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] Query Stage #1 (2 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] Query Stage #2 (4 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost] Query Stage #3 (4 -> 4): ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) @@ -128,15 +128,15 @@ ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "n_nation ProjectionExec: expr=[n_nationkey@0 as n_nationkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = ALGERIA - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] Query Stage #6 (2 -> 4): ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] Query Stage #7 (4 -> 4): ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }], 4)) - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] Query Stage #8 (4 -> 4): ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4)) diff --git a/testdata/expected-plans/q12.txt b/testdata/expected-plans/q12.txt index d2f81fb..43f07ed 100644 --- a/testdata/expected-plans/q12.txt +++ b/testdata/expected-plans/q12.txt @@ -29,10 +29,10 @@ SortPreservingMergeExec: [l_shipmode@0 ASC NULLS LAST] ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_shipmode@4 as l_shipmode] CoalesceBatchesExec: target_batch_size=8192 FilterExec: (l_shipmode@4 = FOB OR l_shipmode@4 = SHIP) AND l_receiptdate@3 > l_commitdate@2 AND l_shipdate@1 < l_commitdate@2 AND l_receiptdate@3 >= 1995-01-01 AND l_receiptdate@3 < 1996-01-01 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode], predicate=(l_shipmode@14 = FOB OR l_shipmode@14 = SHIP) AND l_receiptdate@12 > l_commitdate@11 AND l_shipdate@10 < l_commitdate@11 AND l_receiptdate@12 >= 1995-01-01 AND l_receiptdate@12 < 1996-01-01, pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= FOB AND FOB <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= SHIP AND SHIP <= l_shipmode_max@1 END) AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_max@4 >= 1995-01-01 END AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_min@7 < 1996-01-01 END, required_guarantees=[l_shipmode in (SHIP, FOB)] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode], predicate=(l_shipmode@14 = FOB OR l_shipmode@14 = SHIP) AND l_receiptdate@12 > l_commitdate@11 AND l_shipdate@10 < l_commitdate@11 AND l_receiptdate@12 >= 1995-01-01 AND l_receiptdate@12 < 1996-01-01, pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= FOB AND FOB <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= SHIP AND SHIP <= l_shipmode_max@1 END) AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_max@4 >= 1995-01-01 END AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_min@7 < 1996-01-01 END, required_guarantees=[l_shipmode in (SHIP, FOB)] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_orderpriority] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_orderpriority] RaySQL Plan =========== @@ -42,11 +42,11 @@ ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "l_orderk ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_shipmode@4 as l_shipmode] CoalesceBatchesExec: target_batch_size=8192 FilterExec: (l_shipmode@4 = FOB OR l_shipmode@4 = SHIP) AND l_receiptdate@3 > l_commitdate@2 AND l_shipdate@1 < l_commitdate@2 AND l_receiptdate@3 >= 1995-01-01 AND l_receiptdate@3 < 1996-01-01 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode], predicate=(l_shipmode@14 = FOB OR l_shipmode@14 = SHIP) AND l_receiptdate@12 > l_commitdate@11 AND l_shipdate@10 < l_commitdate@11 AND l_receiptdate@12 >= 1995-01-01 AND l_receiptdate@12 < 1996-01-01, pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= FOB AND FOB <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= SHIP AND SHIP <= l_shipmode_max@1 END) AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_max@4 >= 1995-01-01 END AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_min@7 < 1996-01-01 END, required_guarantees=[l_shipmode in (SHIP, FOB)] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode], predicate=(l_shipmode@14 = FOB OR l_shipmode@14 = SHIP) AND l_receiptdate@12 > l_commitdate@11 AND l_shipdate@10 < l_commitdate@11 AND l_receiptdate@12 >= 1995-01-01 AND l_receiptdate@12 < 1996-01-01, pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= FOB AND FOB <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= SHIP AND SHIP <= l_shipmode_max@1 END) AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_max@4 >= 1995-01-01 END AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_min@7 < 1996-01-01 END, required_guarantees=[l_shipmode in (SHIP, FOB)] Query Stage #1 (4 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_orderpriority] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_orderpriority] Query Stage #2 (4 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "l_shipmode", index: 0 }], 4)) diff --git a/testdata/expected-plans/q13.txt b/testdata/expected-plans/q13.txt index aaaea0f..cb2911a 100644 --- a/testdata/expected-plans/q13.txt +++ b/testdata/expected-plans/q13.txt @@ -30,27 +30,27 @@ SortPreservingMergeExec: [custdist@1 DESC,c_count@0 DESC] HashJoinExec: mode=Partitioned, join_type=Left, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, o_orderkey@1] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4 ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_comment@2 NOT LIKE %express%requests% - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_comment], predicate=o_comment@8 NOT LIKE %express%requests% + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_comment], predicate=o_comment@8 NOT LIKE %express%requests% RaySQL Plan =========== Query Stage #0 (4 -> 4): ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey] Query Stage #1 (4 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_comment@2 NOT LIKE %express%requests% - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_comment], predicate=o_comment@8 NOT LIKE %express%requests% + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_comment], predicate=o_comment@8 NOT LIKE %express%requests% Query Stage #2 (4 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "c_count", index: 0 }], 4)) diff --git a/testdata/expected-plans/q14.txt b/testdata/expected-plans/q14.txt index aa81525..fa366f3 100644 --- a/testdata/expected-plans/q14.txt +++ b/testdata/expected-plans/q14.txt @@ -23,27 +23,27 @@ ProjectionExec: expr=[100 * CAST(sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") T CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/part.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_type] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_type] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4 ProjectionExec: expr=[l_partkey@0 as l_partkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@3 >= 1995-02-01 AND l_shipdate@3 < 1995-03-01 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-02-01 AND l_shipdate@10 < 1995-03-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-02-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-03-01 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-02-01 AND l_shipdate@10 < 1995-03-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-02-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-03-01 END, required_guarantees=[] RaySQL Plan =========== Query Stage #0 (2 -> 4): ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/part.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_type] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_type] Query Stage #1 (4 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) ProjectionExec: expr=[l_partkey@0 as l_partkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@3 >= 1995-02-01 AND l_shipdate@3 < 1995-03-01 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-02-01 AND l_shipdate@10 < 1995-03-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-02-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-03-01 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-02-01 AND l_shipdate@10 < 1995-03-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-02-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-03-01 END, required_guarantees=[] Query Stage #2 (4 -> 1): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([], 4)) diff --git a/testdata/expected-plans/q16.txt b/testdata/expected-plans/q16.txt index 4c4b603..b72c9f1 100644 --- a/testdata/expected-plans/q16.txt +++ b/testdata/expected-plans/q16.txt @@ -38,7 +38,7 @@ SortPreservingMergeExec: [supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 CoalesceBatchesExec: target_batch_size=8192 FilterExec: s_comment@1 LIKE %Customer%Complaints% RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_comment], predicate=s_comment@6 LIKE %Customer%Complaints% + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_comment], predicate=s_comment@6 LIKE %Customer%Complaints% CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4 ProjectionExec: expr=[ps_suppkey@3 as ps_suppkey, p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size] @@ -49,10 +49,10 @@ SortPreservingMergeExec: [supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_brand@1 != Brand#14 AND p_type@2 NOT LIKE SMALL PLATED% AND Use p_size@3 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]) RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/part.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_brand, p_type, p_size], predicate=p_brand@3 != Brand#14 AND p_type@4 NOT LIKE SMALL PLATED% AND Use p_size@5 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]), pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 != Brand#14 OR Brand#14 != p_brand_max@1 END AND (CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 14 AND 14 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 6 AND 6 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 5 AND 5 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 31 AND 31 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 49 AND 49 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 15 AND 15 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 41 AND 41 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 47 AND 47 <= p_size_max@5 END), required_guarantees=[p_brand not in (Brand#14), p_size in (41, 14, 49, 6, 31, 15, 5, 47)] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_brand, p_type, p_size], predicate=p_brand@3 != Brand#14 AND p_type@4 NOT LIKE SMALL PLATED% AND Use p_size@5 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]), pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 != Brand#14 OR Brand#14 != p_brand_max@1 END AND (CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 14 AND 14 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 6 AND 6 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 5 AND 5 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 31 AND 31 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 49 AND 49 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 15 AND 15 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 41 AND 41 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 47 AND 47 <= p_size_max@5 END), required_guarantees=[p_brand not in (Brand#14), p_size in (41, 14, 49, 6, 31, 15, 5, 47)] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey] RaySQL Plan =========== @@ -62,17 +62,17 @@ ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "s_suppke ProjectionExec: expr=[s_suppkey@0 as s_suppkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: s_comment@1 LIKE %Customer%Complaints% - ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_comment], predicate=s_comment@6 LIKE %Customer%Complaints% + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_comment], predicate=s_comment@6 LIKE %Customer%Complaints% Query Stage #1 (2 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_brand@1 != Brand#14 AND p_type@2 NOT LIKE SMALL PLATED% AND Use p_size@3 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]) - ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/part.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_brand, p_type, p_size], predicate=p_brand@3 != Brand#14 AND p_type@4 NOT LIKE SMALL PLATED% AND Use p_size@5 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]), pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 != Brand#14 OR Brand#14 != p_brand_max@1 END AND (CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 14 AND 14 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 6 AND 6 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 5 AND 5 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 31 AND 31 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 49 AND 49 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 15 AND 15 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 41 AND 41 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 47 AND 47 <= p_size_max@5 END), required_guarantees=[p_brand not in (Brand#14), p_size in (41, 14, 49, 6, 31, 15, 5, 47)] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_brand, p_type, p_size], predicate=p_brand@3 != Brand#14 AND p_type@4 NOT LIKE SMALL PLATED% AND Use p_size@5 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]), pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 != Brand#14 OR Brand#14 != p_brand_max@1 END AND (CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 14 AND 14 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 6 AND 6 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 5 AND 5 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 31 AND 31 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 49 AND 49 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 15 AND 15 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 41 AND 41 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 47 AND 47 <= p_size_max@5 END), required_guarantees=[p_brand not in (Brand#14), p_size in (41, 14, 49, 6, 31, 15, 5, 47)] Query Stage #2 (4 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey] Query Stage #3 (4 -> 4): ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4)) diff --git a/testdata/expected-plans/q17.txt b/testdata/expected-plans/q17.txt index 9c52300..60c9796 100644 --- a/testdata/expected-plans/q17.txt +++ b/testdata/expected-plans/q17.txt @@ -34,16 +34,16 @@ ProjectionExec: expr=[CAST(sum(lineitem.l_extendedprice)@0 AS Float64) / 7 as av CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_brand@1 = Brand#42 AND p_container@2 = LG BAG RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/part.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_brand, p_container], predicate=p_brand@3 = Brand#42 AND p_container@6 = LG BAG, pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 <= Brand#42 AND Brand#42 <= p_brand_max@1 END AND CASE WHEN p_container_null_count@6 = p_container_row_count@7 THEN false ELSE p_container_min@4 <= LG BAG AND LG BAG <= p_container_max@5 END, required_guarantees=[p_brand in (Brand#42), p_container in (LG BAG)] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_brand, p_container], predicate=p_brand@3 = Brand#42 AND p_container@6 = LG BAG, pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 <= Brand#42 AND Brand#42 <= p_brand_max@1 END AND CASE WHEN p_container_null_count@6 = p_container_row_count@7 THEN false ELSE p_container_min@4 <= LG BAG AND LG BAG <= p_container_max@5 END, required_guarantees=[p_brand in (Brand#42), p_container in (LG BAG)] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_quantity, l_extendedprice] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_quantity, l_extendedprice] ProjectionExec: expr=[CAST(0.2 * CAST(avg(lineitem.l_quantity)@1 AS Float64) AS Decimal128(30, 15)) as Float64(0.2) * avg(lineitem.l_quantity), l_partkey@0 as l_partkey] AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4 AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_quantity] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_quantity] RaySQL Plan =========== @@ -53,16 +53,16 @@ ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "p_partke ProjectionExec: expr=[p_partkey@0 as p_partkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_brand@1 = Brand#42 AND p_container@2 = LG BAG - ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/part.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_brand, p_container], predicate=p_brand@3 = Brand#42 AND p_container@6 = LG BAG, pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 <= Brand#42 AND Brand#42 <= p_brand_max@1 END AND CASE WHEN p_container_null_count@6 = p_container_row_count@7 THEN false ELSE p_container_min@4 <= LG BAG AND LG BAG <= p_container_max@5 END, required_guarantees=[p_brand in (Brand#42), p_container in (LG BAG)] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_brand, p_container], predicate=p_brand@3 = Brand#42 AND p_container@6 = LG BAG, pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 <= Brand#42 AND Brand#42 <= p_brand_max@1 END AND CASE WHEN p_container_null_count@6 = p_container_row_count@7 THEN false ELSE p_container_min@4 <= LG BAG AND LG BAG <= p_container_max@5 END, required_guarantees=[p_brand in (Brand#42), p_container in (LG BAG)] Query Stage #1 (4 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_quantity, l_extendedprice] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_quantity, l_extendedprice] Query Stage #2 (4 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_quantity] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_quantity] Query Stage #3 (4 -> 1): ShuffleWriterExec(stage_id=3, output_partitioning=Hash([], 4)) diff --git a/testdata/expected-plans/q18.txt b/testdata/expected-plans/q18.txt index 75e1d62..eac2add 100644 --- a/testdata/expected-plans/q18.txt +++ b/testdata/expected-plans/q18.txt @@ -37,7 +37,7 @@ GlobalLimitExec: skip=0, fetch=100 CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 AggregateExec: mode=Partial, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_quantity] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_quantity] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@2, l_orderkey@0)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@3, o_orderdate@4, l_quantity@6] CoalesceBatchesExec: target_batch_size=8192 @@ -46,13 +46,13 @@ GlobalLimitExec: skip=0, fetch=100 HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@4, o_orderdate@5] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_name] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_name] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_quantity] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_quantity] RaySQL Plan =========== @@ -60,15 +60,15 @@ RaySQL Plan Query Stage #0 (4 -> 4): ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) AggregateExec: mode=Partial, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_quantity] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_quantity] Query Stage #1 (4 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_name] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_name] Query Stage #2 (4 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] Query Stage #3 (4 -> 4): ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "o_orderkey", index: 2 }], 4)) @@ -81,7 +81,7 @@ ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "o_orderk Query Stage #4 (4 -> 4): ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_quantity] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_quantity] Query Stage #5 (4 -> 4): ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "c_name", index: 0 }, Column { name: "c_custkey", index: 1 }, Column { name: "o_orderkey", index: 2 }, Column { name: "o_orderdate", index: 3 }, Column { name: "o_totalprice", index: 4 }], 4)) diff --git a/testdata/expected-plans/q19.txt b/testdata/expected-plans/q19.txt index 4571ed0..6f46143 100644 --- a/testdata/expected-plans/q19.txt +++ b/testdata/expected-plans/q19.txt @@ -25,13 +25,13 @@ ProjectionExec: expr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_disco CoalesceBatchesExec: target_batch_size=8192 FilterExec: (p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@2 <= 15) AND p_size@2 >= 1 RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/part.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_brand, p_size, p_container], predicate=p_size@5 >= 1 AND (p_brand@3 = Brand#21 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@5 <= 5 OR p_brand@3 = Brand#13 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@5 <= 10 OR p_brand@3 = Brand#52 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@5 <= 15), pruning_predicate=CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_max@0 >= 1 END AND (CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#21 AND Brand#21 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM CASE AND SM CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM BOX AND SM BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PACK AND SM PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PKG AND SM PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 5 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#13 AND Brand#13 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BAG AND MED BAG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BOX AND MED BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PKG AND MED PKG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PACK AND MED PACK <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 10 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#52 AND Brand#52 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG CASE AND LG CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG BOX AND LG BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PACK AND LG PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PKG AND LG PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 15 END), required_guarantees=[] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_brand, p_size, p_container], predicate=p_size@5 >= 1 AND (p_brand@3 = Brand#21 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@5 <= 5 OR p_brand@3 = Brand#13 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@5 <= 10 OR p_brand@3 = Brand#52 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@5 <= 15), pruning_predicate=CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_max@0 >= 1 END AND (CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#21 AND Brand#21 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM CASE AND SM CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM BOX AND SM BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PACK AND SM PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PKG AND SM PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 5 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#13 AND Brand#13 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BAG AND MED BAG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BOX AND MED BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PKG AND MED PKG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PACK AND MED PACK <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 10 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#52 AND Brand#52 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG CASE AND LG CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG BOX AND LG BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PACK AND LG PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PKG AND LG PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 15 END), required_guarantees=[] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4 ProjectionExec: expr=[l_partkey@0 as l_partkey, l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: (l_quantity@1 >= Some(800),11,2 AND l_quantity@1 <= Some(1800),11,2 OR l_quantity@1 >= Some(2000),11,2 AND l_quantity@1 <= Some(3000),11,2 OR l_quantity@1 >= Some(3000),11,2 AND l_quantity@1 <= Some(4000),11,2) AND (l_shipmode@5 = AIR OR l_shipmode@5 = AIR REG) AND l_shipinstruct@4 = DELIVER IN PERSON - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], predicate=(l_shipmode@14 = AIR OR l_shipmode@14 = AIR REG) AND l_shipinstruct@13 = DELIVER IN PERSON AND (l_quantity@4 >= Some(800),11,2 AND l_quantity@4 <= Some(1800),11,2 OR l_quantity@4 >= Some(2000),11,2 AND l_quantity@4 <= Some(3000),11,2 OR l_quantity@4 >= Some(3000),11,2 AND l_quantity@4 <= Some(4000),11,2), pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR AND AIR <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR REG AND AIR REG <= l_shipmode_max@1 END) AND CASE WHEN l_shipinstruct_null_count@6 = l_shipinstruct_row_count@7 THEN false ELSE l_shipinstruct_min@4 <= DELIVER IN PERSON AND DELIVER IN PERSON <= l_shipinstruct_max@5 END AND (CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(800),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(1800),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(2000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(3000),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(3000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(4000),11,2 END), required_guarantees=[l_shipmode in (AIR REG, AIR), l_shipinstruct in (DELIVER IN PERSON)] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], predicate=(l_shipmode@14 = AIR OR l_shipmode@14 = AIR REG) AND l_shipinstruct@13 = DELIVER IN PERSON AND (l_quantity@4 >= Some(800),11,2 AND l_quantity@4 <= Some(1800),11,2 OR l_quantity@4 >= Some(2000),11,2 AND l_quantity@4 <= Some(3000),11,2 OR l_quantity@4 >= Some(3000),11,2 AND l_quantity@4 <= Some(4000),11,2), pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR AND AIR <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR REG AND AIR REG <= l_shipmode_max@1 END) AND CASE WHEN l_shipinstruct_null_count@6 = l_shipinstruct_row_count@7 THEN false ELSE l_shipinstruct_min@4 <= DELIVER IN PERSON AND DELIVER IN PERSON <= l_shipinstruct_max@5 END AND (CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(800),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(1800),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(2000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(3000),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(3000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(4000),11,2 END), required_guarantees=[l_shipmode in (AIR REG, AIR), l_shipinstruct in (DELIVER IN PERSON)] RaySQL Plan =========== @@ -40,14 +40,14 @@ Query Stage #0 (2 -> 4): ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) CoalesceBatchesExec: target_batch_size=8192 FilterExec: (p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@2 <= 15) AND p_size@2 >= 1 - ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/part.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_brand, p_size, p_container], predicate=p_size@5 >= 1 AND (p_brand@3 = Brand#21 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@5 <= 5 OR p_brand@3 = Brand#13 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@5 <= 10 OR p_brand@3 = Brand#52 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@5 <= 15), pruning_predicate=CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_max@0 >= 1 END AND (CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#21 AND Brand#21 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM CASE AND SM CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM BOX AND SM BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PACK AND SM PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PKG AND SM PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 5 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#13 AND Brand#13 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BAG AND MED BAG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BOX AND MED BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PKG AND MED PKG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PACK AND MED PACK <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 10 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#52 AND Brand#52 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG CASE AND LG CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG BOX AND LG BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PACK AND LG PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PKG AND LG PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 15 END), required_guarantees=[] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_brand, p_size, p_container], predicate=p_size@5 >= 1 AND (p_brand@3 = Brand#21 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@5 <= 5 OR p_brand@3 = Brand#13 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@5 <= 10 OR p_brand@3 = Brand#52 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@5 <= 15), pruning_predicate=CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_max@0 >= 1 END AND (CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#21 AND Brand#21 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM CASE AND SM CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM BOX AND SM BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PACK AND SM PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PKG AND SM PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 5 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#13 AND Brand#13 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BAG AND MED BAG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BOX AND MED BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PKG AND MED PKG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PACK AND MED PACK <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 10 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#52 AND Brand#52 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG CASE AND LG CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG BOX AND LG BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PACK AND LG PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PKG AND LG PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 15 END), required_guarantees=[] Query Stage #1 (4 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) ProjectionExec: expr=[l_partkey@0 as l_partkey, l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: (l_quantity@1 >= Some(800),11,2 AND l_quantity@1 <= Some(1800),11,2 OR l_quantity@1 >= Some(2000),11,2 AND l_quantity@1 <= Some(3000),11,2 OR l_quantity@1 >= Some(3000),11,2 AND l_quantity@1 <= Some(4000),11,2) AND (l_shipmode@5 = AIR OR l_shipmode@5 = AIR REG) AND l_shipinstruct@4 = DELIVER IN PERSON - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], predicate=(l_shipmode@14 = AIR OR l_shipmode@14 = AIR REG) AND l_shipinstruct@13 = DELIVER IN PERSON AND (l_quantity@4 >= Some(800),11,2 AND l_quantity@4 <= Some(1800),11,2 OR l_quantity@4 >= Some(2000),11,2 AND l_quantity@4 <= Some(3000),11,2 OR l_quantity@4 >= Some(3000),11,2 AND l_quantity@4 <= Some(4000),11,2), pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR AND AIR <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR REG AND AIR REG <= l_shipmode_max@1 END) AND CASE WHEN l_shipinstruct_null_count@6 = l_shipinstruct_row_count@7 THEN false ELSE l_shipinstruct_min@4 <= DELIVER IN PERSON AND DELIVER IN PERSON <= l_shipinstruct_max@5 END AND (CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(800),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(1800),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(2000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(3000),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(3000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(4000),11,2 END), required_guarantees=[l_shipmode in (AIR REG, AIR), l_shipinstruct in (DELIVER IN PERSON)] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], predicate=(l_shipmode@14 = AIR OR l_shipmode@14 = AIR REG) AND l_shipinstruct@13 = DELIVER IN PERSON AND (l_quantity@4 >= Some(800),11,2 AND l_quantity@4 <= Some(1800),11,2 OR l_quantity@4 >= Some(2000),11,2 AND l_quantity@4 <= Some(3000),11,2 OR l_quantity@4 >= Some(3000),11,2 AND l_quantity@4 <= Some(4000),11,2), pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR AND AIR <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR REG AND AIR REG <= l_shipmode_max@1 END) AND CASE WHEN l_shipinstruct_null_count@6 = l_shipinstruct_row_count@7 THEN false ELSE l_shipinstruct_min@4 <= DELIVER IN PERSON AND DELIVER IN PERSON <= l_shipinstruct_max@5 END AND (CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(800),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(1800),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(2000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(3000),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(3000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(4000),11,2 END), required_guarantees=[l_shipmode in (AIR REG, AIR), l_shipinstruct in (DELIVER IN PERSON)] Query Stage #2 (4 -> 1): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([], 4)) diff --git a/testdata/expected-plans/q2.txt b/testdata/expected-plans/q2.txt index 9b08cf5..16213f0 100644 --- a/testdata/expected-plans/q2.txt +++ b/testdata/expected-plans/q2.txt @@ -57,7 +57,7 @@ GlobalLimitExec: skip=0, fetch=100 ProjectionExec: expr=[r_regionkey@0 as r_regionkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: r_name@1 = ASIA - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([n_regionkey@9], 4), input_partitions=4 ProjectionExec: expr=[p_partkey@2 as p_partkey, p_mfgr@3 as p_mfgr, s_name@4 as s_name, s_address@5 as s_address, s_phone@6 as s_phone, s_acctbal@7 as s_acctbal, s_comment@8 as s_comment, ps_supplycost@9 as ps_supplycost, n_name@0 as n_name, n_regionkey@1 as n_regionkey] @@ -65,7 +65,7 @@ GlobalLimitExec: skip=0, fetch=100 HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@4)], projection=[n_name@1, n_regionkey@2, p_partkey@3, p_mfgr@4, s_name@5, s_address@6, s_phone@8, s_acctbal@9, s_comment@10, ps_supplycost@11] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([s_nationkey@4], 4), input_partitions=4 ProjectionExec: expr=[p_partkey@6 as p_partkey, p_mfgr@7 as p_mfgr, s_name@0 as s_name, s_address@1 as s_address, s_nationkey@2 as s_nationkey, s_phone@3 as s_phone, s_acctbal@4 as s_acctbal, s_comment@5 as s_comment, ps_supplycost@8 as ps_supplycost] @@ -74,7 +74,7 @@ GlobalLimitExec: skip=0, fetch=100 CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([ps_suppkey@2], 4), input_partitions=4 CoalesceBatchesExec: target_batch_size=8192 @@ -85,10 +85,10 @@ GlobalLimitExec: skip=0, fetch=100 CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_size@3 = 48 AND p_type@2 LIKE %TIN RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/part.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_mfgr, p_type, p_size], predicate=p_size@5 = 48 AND p_type@4 LIKE %TIN, pruning_predicate=CASE WHEN p_size_null_count@2 = p_size_row_count@3 THEN false ELSE p_size_min@0 <= 48 AND 48 <= p_size_max@1 END, required_guarantees=[p_size in (48)] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_mfgr, p_type, p_size], predicate=p_size@5 = 48 AND p_type@4 LIKE %TIN, pruning_predicate=CASE WHEN p_size_null_count@2 = p_size_row_count@3 THEN false ELSE p_size_min@0 <= 48 AND 48 <= p_size_max@1 END, required_guarantees=[p_size in (48)] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([ps_partkey@1, min(partsupp.ps_supplycost)@0], 4), input_partitions=4 ProjectionExec: expr=[min(partsupp.ps_supplycost)@1 as min(partsupp.ps_supplycost), ps_partkey@0 as ps_partkey] @@ -104,7 +104,7 @@ GlobalLimitExec: skip=0, fetch=100 ProjectionExec: expr=[r_regionkey@0 as r_regionkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: r_name@1 = ASIA - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([n_regionkey@2], 4), input_partitions=4 ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, n_regionkey@0 as n_regionkey] @@ -112,7 +112,7 @@ GlobalLimitExec: skip=0, fetch=100 HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_regionkey@1, ps_partkey@2, ps_supplycost@3] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_regionkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_regionkey] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4 ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] @@ -121,10 +121,10 @@ GlobalLimitExec: skip=0, fetch=100 CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([ps_suppkey@1], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] RaySQL Plan =========== @@ -134,26 +134,26 @@ ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "r_region ProjectionExec: expr=[r_regionkey@0 as r_regionkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: r_name@1 = ASIA - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] Query Stage #1 (1 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] Query Stage #2 (2 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] Query Stage #3 (2 -> 4): ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_size@3 = 48 AND p_type@2 LIKE %TIN - ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/part.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_mfgr, p_type, p_size], predicate=p_size@5 = 48 AND p_type@4 LIKE %TIN, pruning_predicate=CASE WHEN p_size_null_count@2 = p_size_row_count@3 THEN false ELSE p_size_min@0 <= 48 AND 48 <= p_size_max@1 END, required_guarantees=[p_size in (48)] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_mfgr, p_type, p_size], predicate=p_size@5 = 48 AND p_type@4 LIKE %TIN, pruning_predicate=CASE WHEN p_size_null_count@2 = p_size_row_count@3 THEN false ELSE p_size_min@0 <= 48 AND 48 <= p_size_max@1 END, required_guarantees=[p_size in (48)] Query Stage #4 (4 -> 4): ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] Query Stage #5 (4 -> 4): ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "ps_suppkey", index: 2 }], 4)) @@ -198,19 +198,19 @@ ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "r_region ProjectionExec: expr=[r_regionkey@0 as r_regionkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: r_name@1 = ASIA - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] Query Stage #10 (1 -> 4): ShuffleWriterExec(stage_id=10, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_regionkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_regionkey] Query Stage #11 (2 -> 4): ShuffleWriterExec(stage_id=11, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] Query Stage #12 (4 -> 4): ShuffleWriterExec(stage_id=12, output_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }], 4)) - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] Query Stage #13 (4 -> 4): ShuffleWriterExec(stage_id=13, output_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) diff --git a/testdata/expected-plans/q20.txt b/testdata/expected-plans/q20.txt index 8c9f1b5..9e495cc 100644 --- a/testdata/expected-plans/q20.txt +++ b/testdata/expected-plans/q20.txt @@ -43,11 +43,11 @@ SortPreservingMergeExec: [s_name@0 ASC NULLS LAST] ProjectionExec: expr=[n_nationkey@0 as n_nationkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = KENYA - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = KENYA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= KENYA AND KENYA <= n_name_max@1 END, required_guarantees=[n_name in (KENYA)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = KENYA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= KENYA AND KENYA <= n_name_max@1 END, required_guarantees=[n_name in (KENYA)] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([s_nationkey@3], 4), input_partitions=4 RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4 CoalesceBatchesExec: target_batch_size=8192 @@ -62,10 +62,10 @@ SortPreservingMergeExec: [s_name@0 ASC NULLS LAST] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_name@1 LIKE blanched% RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/part.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE blanched% + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE blanched% CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_availqty] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_availqty] ProjectionExec: expr=[0.5 * CAST(sum(lineitem.l_quantity)@2 AS Float64) as Float64(0.5) * sum(lineitem.l_quantity), l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey] AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] CoalesceBatchesExec: target_batch_size=8192 @@ -74,7 +74,7 @@ SortPreservingMergeExec: [s_name@0 ASC NULLS LAST] ProjectionExec: expr=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey, l_quantity@2 as l_quantity] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@3 >= 1993-01-01 AND l_shipdate@3 < 1994-01-01 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], predicate=l_shipdate@10 >= 1993-01-01 AND l_shipdate@10 < 1994-01-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1993-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1994-01-01 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], predicate=l_shipdate@10 >= 1993-01-01 AND l_shipdate@10 < 1994-01-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1993-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1994-01-01 END, required_guarantees=[] RaySQL Plan =========== @@ -84,11 +84,11 @@ ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nation ProjectionExec: expr=[n_nationkey@0 as n_nationkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = KENYA - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = KENYA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= KENYA AND KENYA <= n_name_max@1 END, required_guarantees=[n_name in (KENYA)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = KENYA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= KENYA AND KENYA <= n_name_max@1 END, required_guarantees=[n_name in (KENYA)] Query Stage #1 (2 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4)) - ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey] Query Stage #2 (4 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) @@ -104,11 +104,11 @@ ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "p_partke ProjectionExec: expr=[p_partkey@0 as p_partkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_name@1 LIKE blanched% - ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/part.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE blanched% + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE blanched% Query Stage #4 (4 -> 4): ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_availqty] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_availqty] Query Stage #5 (4 -> 4): ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }, Column { name: "ps_suppkey", index: 1 }], 4)) @@ -125,7 +125,7 @@ ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "l_partke ProjectionExec: expr=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey, l_quantity@2 as l_quantity] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@3 >= 1993-01-01 AND l_shipdate@3 < 1994-01-01 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], predicate=l_shipdate@10 >= 1993-01-01 AND l_shipdate@10 < 1994-01-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1993-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1994-01-01 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], predicate=l_shipdate@10 >= 1993-01-01 AND l_shipdate@10 < 1994-01-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1993-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1994-01-01 END, required_guarantees=[] Query Stage #7 (4 -> 4): ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4)) diff --git a/testdata/expected-plans/q21.txt b/testdata/expected-plans/q21.txt index c2390e8..f4a9f43 100644 --- a/testdata/expected-plans/q21.txt +++ b/testdata/expected-plans/q21.txt @@ -59,7 +59,7 @@ GlobalLimitExec: skip=0, fetch=100 ProjectionExec: expr=[n_nationkey@0 as n_nationkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = ARGENTINA - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ARGENTINA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ARGENTINA AND ARGENTINA <= n_name_max@1 END, required_guarantees=[n_name in (ARGENTINA)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ARGENTINA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ARGENTINA AND ARGENTINA <= n_name_max@1 END, required_guarantees=[n_name in (ARGENTINA)] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([s_nationkey@1], 4), input_partitions=4 CoalesceBatchesExec: target_batch_size=8192 @@ -69,7 +69,7 @@ GlobalLimitExec: skip=0, fetch=100 ProjectionExec: expr=[o_orderkey@0 as o_orderkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderstatus@1 = F - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_orderstatus], predicate=o_orderstatus@2 = F, pruning_predicate=CASE WHEN o_orderstatus_null_count@2 = o_orderstatus_row_count@3 THEN false ELSE o_orderstatus_min@0 <= F AND F <= o_orderstatus_max@1 END, required_guarantees=[o_orderstatus in (F)] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_orderstatus], predicate=o_orderstatus@2 = F, pruning_predicate=CASE WHEN o_orderstatus_null_count@2 = o_orderstatus_row_count@3 THEN false ELSE o_orderstatus_min@0 <= F AND F <= o_orderstatus_max@1 END, required_guarantees=[o_orderstatus in (F)] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_orderkey@2], 4), input_partitions=4 CoalesceBatchesExec: target_batch_size=8192 @@ -77,22 +77,22 @@ GlobalLimitExec: skip=0, fetch=100 CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_nationkey] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_suppkey@1], 4), input_partitions=4 ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_receiptdate@3 > l_commitdate@2 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_receiptdate@3 > l_commitdate@2 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 RaySQL Plan =========== @@ -102,25 +102,25 @@ ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nation ProjectionExec: expr=[n_nationkey@0 as n_nationkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = ARGENTINA - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ARGENTINA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ARGENTINA AND ARGENTINA <= n_name_max@1 END, required_guarantees=[n_name in (ARGENTINA)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ARGENTINA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ARGENTINA AND ARGENTINA <= n_name_max@1 END, required_guarantees=[n_name in (ARGENTINA)] Query Stage #1 (4 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) ProjectionExec: expr=[o_orderkey@0 as o_orderkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderstatus@1 = F - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_orderstatus], predicate=o_orderstatus@2 = F, pruning_predicate=CASE WHEN o_orderstatus_null_count@2 = o_orderstatus_row_count@3 THEN false ELSE o_orderstatus_min@0 <= F AND F <= o_orderstatus_max@1 END, required_guarantees=[o_orderstatus in (F)] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_orderstatus], predicate=o_orderstatus@2 = F, pruning_predicate=CASE WHEN o_orderstatus_null_count@2 = o_orderstatus_row_count@3 THEN false ELSE o_orderstatus_min@0 <= F AND F <= o_orderstatus_max@1 END, required_guarantees=[o_orderstatus in (F)] Query Stage #2 (2 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_nationkey] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_nationkey] Query Stage #3 (4 -> 4): ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4)) ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_receiptdate@3 > l_commitdate@2 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 Query Stage #4 (4 -> 4): ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "l_orderkey", index: 2 }], 4)) @@ -151,14 +151,14 @@ ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "l_orderk Query Stage #7 (4 -> 4): ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey] Query Stage #8 (4 -> 4): ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_receiptdate@3 > l_commitdate@2 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 Query Stage #9 (4 -> 4): ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "s_name", index: 0 }], 4)) diff --git a/testdata/expected-plans/q22.txt b/testdata/expected-plans/q22.txt index b091c31..b770c11 100644 --- a/testdata/expected-plans/q22.txt +++ b/testdata/expected-plans/q22.txt @@ -37,17 +37,17 @@ SortPreservingMergeExec: [cntrycode@0 ASC NULLS LAST] ProjectionExec: expr=[c_acctbal@1 as c_acctbal] CoalesceBatchesExec: target_batch_size=8192 FilterExec: c_acctbal@1 > Some(0),11,2 AND Use substr(c_phone@0, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_phone, c_acctbal], predicate=c_acctbal@5 > Some(0),11,2 AND Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) AND c_acctbal@5 > Some(0),11,2, pruning_predicate=CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END AND CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_phone, c_acctbal], predicate=c_acctbal@5 > Some(0),11,2 AND Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) AND c_acctbal@5 > Some(0),11,2, pruning_predicate=CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END AND CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END, required_guarantees=[] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(c_custkey@0, o_custkey@0)], projection=[c_phone@1, c_acctbal@2] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 CoalesceBatchesExec: target_batch_size=8192 FilterExec: Use substr(c_phone@1, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_phone, c_acctbal], predicate=Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_phone, c_acctbal], predicate=Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([o_custkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_custkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_custkey] RaySQL Plan =========== @@ -58,17 +58,17 @@ ShuffleWriterExec(stage_id=0, output_partitioning=UnknownPartitioning(4)) ProjectionExec: expr=[c_acctbal@1 as c_acctbal] CoalesceBatchesExec: target_batch_size=8192 FilterExec: c_acctbal@1 > Some(0),11,2 AND Use substr(c_phone@0, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_phone, c_acctbal], predicate=c_acctbal@5 > Some(0),11,2 AND Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) AND c_acctbal@5 > Some(0),11,2, pruning_predicate=CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END AND CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_phone, c_acctbal], predicate=c_acctbal@5 > Some(0),11,2 AND Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) AND c_acctbal@5 > Some(0),11,2, pruning_predicate=CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END AND CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END, required_guarantees=[] Query Stage #1 (4 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) CoalesceBatchesExec: target_batch_size=8192 FilterExec: Use substr(c_phone@1, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_phone, c_acctbal], predicate=Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_phone, c_acctbal], predicate=Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) Query Stage #2 (4 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "o_custkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_custkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_custkey] Query Stage #3 (4 -> 4): ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "cntrycode", index: 0 }], 4)) diff --git a/testdata/expected-plans/q3.txt b/testdata/expected-plans/q3.txt index b4cc717..2dc9f64 100644 --- a/testdata/expected-plans/q3.txt +++ b/testdata/expected-plans/q3.txt @@ -40,18 +40,18 @@ GlobalLimitExec: skip=0, fetch=10 ProjectionExec: expr=[c_custkey@0 as c_custkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: c_mktsegment@1 = BUILDING - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_mktsegment], predicate=c_mktsegment@6 = BUILDING, pruning_predicate=CASE WHEN c_mktsegment_null_count@2 = c_mktsegment_row_count@3 THEN false ELSE c_mktsegment_min@0 <= BUILDING AND BUILDING <= c_mktsegment_max@1 END, required_guarantees=[c_mktsegment in (BUILDING)] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_mktsegment], predicate=c_mktsegment@6 = BUILDING, pruning_predicate=CASE WHEN c_mktsegment_null_count@2 = c_mktsegment_row_count@3 THEN false ELSE c_mktsegment_min@0 <= BUILDING AND BUILDING <= c_mktsegment_max@1 END, required_guarantees=[c_mktsegment in (BUILDING)] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4 CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@2 < 1995-03-15 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], predicate=o_orderdate@4 < 1995-03-15, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@0 < 1995-03-15 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], predicate=o_orderdate@4 < 1995-03-15, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@0 < 1995-03-15 END, required_guarantees=[] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@3 > 1995-03-15 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 > 1995-03-15, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 > 1995-03-15 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 > 1995-03-15, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 > 1995-03-15 END, required_guarantees=[] RaySQL Plan =========== @@ -61,13 +61,13 @@ ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "c_custke ProjectionExec: expr=[c_custkey@0 as c_custkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: c_mktsegment@1 = BUILDING - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_mktsegment], predicate=c_mktsegment@6 = BUILDING, pruning_predicate=CASE WHEN c_mktsegment_null_count@2 = c_mktsegment_row_count@3 THEN false ELSE c_mktsegment_min@0 <= BUILDING AND BUILDING <= c_mktsegment_max@1 END, required_guarantees=[c_mktsegment in (BUILDING)] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_mktsegment], predicate=c_mktsegment@6 = BUILDING, pruning_predicate=CASE WHEN c_mktsegment_null_count@2 = c_mktsegment_row_count@3 THEN false ELSE c_mktsegment_min@0 <= BUILDING AND BUILDING <= c_mktsegment_max@1 END, required_guarantees=[c_mktsegment in (BUILDING)] Query Stage #1 (4 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@2 < 1995-03-15 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], predicate=o_orderdate@4 < 1995-03-15, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@0 < 1995-03-15 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], predicate=o_orderdate@4 < 1995-03-15, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@0 < 1995-03-15 END, required_guarantees=[] Query Stage #2 (4 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) @@ -83,7 +83,7 @@ ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "l_orderk ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@3 > 1995-03-15 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 > 1995-03-15, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 > 1995-03-15 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 > 1995-03-15, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 > 1995-03-15 END, required_guarantees=[] Query Stage #4 (4 -> 4): ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }, Column { name: "o_orderdate", index: 1 }, Column { name: "o_shippriority", index: 2 }], 4)) diff --git a/testdata/expected-plans/q4.txt b/testdata/expected-plans/q4.txt index c15d906..84642bd 100644 --- a/testdata/expected-plans/q4.txt +++ b/testdata/expected-plans/q4.txt @@ -31,13 +31,13 @@ SortPreservingMergeExec: [o_orderpriority@0 ASC NULLS LAST] ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_orderpriority@2 as o_orderpriority] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@1 >= 1995-04-01 AND o_orderdate@1 < 1995-07-01 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_orderdate, o_orderpriority], predicate=o_orderdate@4 >= 1995-04-01 AND o_orderdate@4 < 1995-07-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-04-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-07-01 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_orderdate, o_orderpriority], predicate=o_orderdate@4 >= 1995-04-01 AND o_orderdate@4 < 1995-07-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-04-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-07-01 END, required_guarantees=[] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 ProjectionExec: expr=[l_orderkey@0 as l_orderkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_receiptdate@2 > l_commitdate@1 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 RaySQL Plan =========== @@ -47,14 +47,14 @@ ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "o_orderk ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_orderpriority@2 as o_orderpriority] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@1 >= 1995-04-01 AND o_orderdate@1 < 1995-07-01 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_orderdate, o_orderpriority], predicate=o_orderdate@4 >= 1995-04-01 AND o_orderdate@4 < 1995-07-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-04-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-07-01 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_orderdate, o_orderpriority], predicate=o_orderdate@4 >= 1995-04-01 AND o_orderdate@4 < 1995-07-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-04-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-07-01 END, required_guarantees=[] Query Stage #1 (4 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) ProjectionExec: expr=[l_orderkey@0 as l_orderkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_receiptdate@2 > l_commitdate@1 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 Query Stage #2 (4 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "o_orderpriority", index: 0 }], 4)) diff --git a/testdata/expected-plans/q5.txt b/testdata/expected-plans/q5.txt index bf23579..a0a761d 100644 --- a/testdata/expected-plans/q5.txt +++ b/testdata/expected-plans/q5.txt @@ -43,7 +43,7 @@ SortPreservingMergeExec: [revenue@1 DESC] ProjectionExec: expr=[r_regionkey@0 as r_regionkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: r_name@1 = AFRICA - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = AFRICA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= AFRICA AND AFRICA <= r_name_max@1 END, required_guarantees=[r_name in (AFRICA)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = AFRICA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= AFRICA AND AFRICA <= r_name_max@1 END, required_guarantees=[r_name in (AFRICA)] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([n_regionkey@3], 4), input_partitions=4 ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, n_name@0 as n_name, n_regionkey@1 as n_regionkey] @@ -51,7 +51,7 @@ SortPreservingMergeExec: [revenue@1 DESC] HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, n_regionkey@2, l_extendedprice@3, l_discount@4] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4 ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@0 as s_nationkey] @@ -60,7 +60,7 @@ SortPreservingMergeExec: [revenue@1 DESC] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([s_suppkey@0, s_nationkey@1], 4), input_partitions=4 RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_suppkey@1, c_nationkey@0], 4), input_partitions=4 CoalesceBatchesExec: target_batch_size=8192 @@ -71,16 +71,16 @@ SortPreservingMergeExec: [revenue@1 DESC] HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_nationkey@1, o_orderkey@2] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_nationkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_nationkey] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4 ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@2 >= 1994-01-01 AND o_orderdate@2 < 1995-01-01 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1994-01-01 AND o_orderdate@4 < 1995-01-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1994-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-01-01 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1994-01-01 AND o_orderdate@4 < 1995-01-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1994-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-01-01 END, required_guarantees=[] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] RaySQL Plan =========== @@ -90,26 +90,26 @@ ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "r_region ProjectionExec: expr=[r_regionkey@0 as r_regionkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: r_name@1 = AFRICA - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = AFRICA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= AFRICA AND AFRICA <= r_name_max@1 END, required_guarantees=[r_name in (AFRICA)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = AFRICA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= AFRICA AND AFRICA <= r_name_max@1 END, required_guarantees=[r_name in (AFRICA)] Query Stage #1 (1 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] Query Stage #2 (2 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }, Column { name: "s_nationkey", index: 1 }], 4)) - ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] Query Stage #3 (4 -> 4): ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_nationkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_nationkey] Query Stage #4 (4 -> 4): ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@2 >= 1994-01-01 AND o_orderdate@2 < 1995-01-01 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1994-01-01 AND o_orderdate@4 < 1995-01-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1994-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-01-01 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1994-01-01 AND o_orderdate@4 < 1995-01-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1994-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-01-01 END, required_guarantees=[] Query Stage #5 (4 -> 4): ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "o_orderkey", index: 1 }], 4)) @@ -122,7 +122,7 @@ ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "o_orderk Query Stage #6 (4 -> 4): ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] Query Stage #7 (4 -> 4): ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "l_suppkey", index: 1 }, Column { name: "c_nationkey", index: 0 }], 4)) diff --git a/testdata/expected-plans/q6.txt b/testdata/expected-plans/q6.txt index 0cd41ee..d04ef4f 100644 --- a/testdata/expected-plans/q6.txt +++ b/testdata/expected-plans/q6.txt @@ -17,7 +17,7 @@ ProjectionExec: expr=[sum(lineitem.l_extendedprice * lineitem.l_discount)@0 as r ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@3 >= 1994-01-01 AND l_shipdate@3 < 1995-01-01 AND l_discount@2 >= Some(3),11,2 AND l_discount@2 <= Some(5),11,2 AND l_quantity@0 < Some(2400),11,2 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1994-01-01 AND l_shipdate@10 < 1995-01-01 AND l_discount@6 >= Some(3),11,2 AND l_discount@6 <= Some(5),11,2 AND l_quantity@4 < Some(2400),11,2, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1994-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-01-01 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_max@4 >= Some(3),11,2 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_min@7 <= Some(5),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@8 < Some(2400),11,2 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1994-01-01 AND l_shipdate@10 < 1995-01-01 AND l_discount@6 >= Some(3),11,2 AND l_discount@6 <= Some(5),11,2 AND l_quantity@4 < Some(2400),11,2, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1994-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-01-01 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_max@4 >= Some(3),11,2 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_min@7 <= Some(5),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@8 < Some(2400),11,2 END, required_guarantees=[] RaySQL Plan =========== @@ -28,7 +28,7 @@ ShuffleWriterExec(stage_id=0, output_partitioning=UnknownPartitioning(4)) ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@3 >= 1994-01-01 AND l_shipdate@3 < 1995-01-01 AND l_discount@2 >= Some(3),11,2 AND l_discount@2 <= Some(5),11,2 AND l_quantity@0 < Some(2400),11,2 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1994-01-01 AND l_shipdate@10 < 1995-01-01 AND l_discount@6 >= Some(3),11,2 AND l_discount@6 <= Some(5),11,2 AND l_quantity@4 < Some(2400),11,2, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1994-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-01-01 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_max@4 >= Some(3),11,2 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_min@7 <= Some(5),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@8 < Some(2400),11,2 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1994-01-01 AND l_shipdate@10 < 1995-01-01 AND l_discount@6 >= Some(3),11,2 AND l_discount@6 <= Some(5),11,2 AND l_quantity@4 < Some(2400),11,2, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1994-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-01-01 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_max@4 >= Some(3),11,2 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_min@7 <= Some(5),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@8 < Some(2400),11,2 END, required_guarantees=[] Query Stage #1 (1 -> 1): ProjectionExec: expr=[sum(lineitem.l_extendedprice * lineitem.l_discount)@0 as revenue] diff --git a/testdata/expected-plans/q7.txt b/testdata/expected-plans/q7.txt index 68c77e4..cc5671c 100644 --- a/testdata/expected-plans/q7.txt +++ b/testdata/expected-plans/q7.txt @@ -45,7 +45,7 @@ SortPreservingMergeExec: [supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS L RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = IRAQ OR n_name@1 = GERMANY - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = IRAQ OR n_name@1 = GERMANY, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = IRAQ OR n_name@1 = GERMANY, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([c_nationkey@3], 4), input_partitions=4 ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_shipdate@3 as l_shipdate, c_nationkey@4 as c_nationkey, n_name@0 as n_name] @@ -56,7 +56,7 @@ SortPreservingMergeExec: [supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS L RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = GERMANY OR n_name@1 = IRAQ - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = GERMANY OR n_name@1 = IRAQ, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END, required_guarantees=[n_name in (IRAQ, GERMANY)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = GERMANY OR n_name@1 = IRAQ, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END, required_guarantees=[n_name in (IRAQ, GERMANY)] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([s_nationkey@0], 4), input_partitions=4 ProjectionExec: expr=[s_nationkey@1 as s_nationkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, l_shipdate@4 as l_shipdate, c_nationkey@0 as c_nationkey] @@ -64,7 +64,7 @@ SortPreservingMergeExec: [supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS L HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@4)], projection=[c_nationkey@1, s_nationkey@2, l_extendedprice@3, l_discount@4, l_shipdate@5] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_nationkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_nationkey] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([o_custkey@4], 4), input_partitions=4 CoalesceBatchesExec: target_batch_size=8192 @@ -76,15 +76,15 @@ SortPreservingMergeExec: [supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS L CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_suppkey@1], 4), input_partitions=4 CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@4 >= 1995-01-01 AND l_shipdate@4 <= 1996-12-31 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-01-01 AND l_shipdate@10 <= 1996-12-31, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 <= 1996-12-31 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-01-01 AND l_shipdate@10 <= 1996-12-31, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 <= 1996-12-31 END, required_guarantees=[] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey] RaySQL Plan =========== @@ -93,27 +93,27 @@ Query Stage #0 (1 -> 4): ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = IRAQ OR n_name@1 = GERMANY - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = IRAQ OR n_name@1 = GERMANY, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = IRAQ OR n_name@1 = GERMANY, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] Query Stage #1 (1 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = GERMANY OR n_name@1 = IRAQ - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = GERMANY OR n_name@1 = IRAQ, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END, required_guarantees=[n_name in (IRAQ, GERMANY)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = GERMANY OR n_name@1 = IRAQ, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END, required_guarantees=[n_name in (IRAQ, GERMANY)] Query Stage #2 (4 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_nationkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_nationkey] Query Stage #3 (2 -> 4): ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] Query Stage #4 (4 -> 4): ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4)) CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@4 >= 1995-01-01 AND l_shipdate@4 <= 1996-12-31 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-01-01 AND l_shipdate@10 <= 1996-12-31, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 <= 1996-12-31 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-01-01 AND l_shipdate@10 <= 1996-12-31, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 <= 1996-12-31 END, required_guarantees=[] Query Stage #5 (4 -> 4): ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "l_orderkey", index: 1 }], 4)) @@ -126,7 +126,7 @@ ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "l_orderk Query Stage #6 (4 -> 4): ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey] Query Stage #7 (4 -> 4): ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "o_custkey", index: 4 }], 4)) diff --git a/testdata/expected-plans/q8.txt b/testdata/expected-plans/q8.txt index cce7bbe..f1bcc00 100644 --- a/testdata/expected-plans/q8.txt +++ b/testdata/expected-plans/q8.txt @@ -54,7 +54,7 @@ SortPreservingMergeExec: [o_year@0 ASC NULLS LAST] ProjectionExec: expr=[r_regionkey@0 as r_regionkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: r_name@1 = MIDDLE EAST - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = MIDDLE EAST, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= MIDDLE EAST AND MIDDLE EAST <= r_name_max@1 END, required_guarantees=[r_name in (MIDDLE EAST)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = MIDDLE EAST, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= MIDDLE EAST AND MIDDLE EAST <= r_name_max@1 END, required_guarantees=[r_name in (MIDDLE EAST)] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([n_regionkey@3], 4), input_partitions=4 ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, o_orderdate@3 as o_orderdate, n_regionkey@4 as n_regionkey, n_name@0 as n_name] @@ -62,7 +62,7 @@ SortPreservingMergeExec: [o_year@0 ASC NULLS LAST] HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, l_extendedprice@2, l_discount@3, o_orderdate@5, n_regionkey@6] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4 ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, n_regionkey@0 as n_regionkey] @@ -70,7 +70,7 @@ SortPreservingMergeExec: [o_year@0 ASC NULLS LAST] HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, c_nationkey@4)], projection=[n_regionkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@5] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_regionkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_regionkey] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([c_nationkey@4], 4), input_partitions=4 ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, c_nationkey@0 as c_nationkey] @@ -78,7 +78,7 @@ SortPreservingMergeExec: [o_year@0 ASC NULLS LAST] HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@3)], projection=[c_nationkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@6] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_nationkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_nationkey] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([o_custkey@3], 4), input_partitions=4 ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, o_custkey@0 as o_custkey, o_orderdate@1 as o_orderdate] @@ -88,7 +88,7 @@ SortPreservingMergeExec: [o_year@0 ASC NULLS LAST] RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@2 >= 1995-01-01 AND o_orderdate@2 <= 1996-12-31 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1995-01-01 AND o_orderdate@4 <= 1996-12-31, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 <= 1996-12-31 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1995-01-01 AND o_orderdate@4 <= 1996-12-31, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 <= 1996-12-31 END, required_guarantees=[] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@0 as s_nationkey] @@ -97,7 +97,7 @@ SortPreservingMergeExec: [o_year@0 ASC NULLS LAST] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_suppkey@1], 4), input_partitions=4 CoalesceBatchesExec: target_batch_size=8192 @@ -108,10 +108,10 @@ SortPreservingMergeExec: [o_year@0 ASC NULLS LAST] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_type@1 = LARGE PLATED STEEL RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/part.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_type], predicate=p_type@4 = LARGE PLATED STEEL, pruning_predicate=CASE WHEN p_type_null_count@2 = p_type_row_count@3 THEN false ELSE p_type_min@0 <= LARGE PLATED STEEL AND LARGE PLATED STEEL <= p_type_max@1 END, required_guarantees=[p_type in (LARGE PLATED STEEL)] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_type], predicate=p_type@4 = LARGE PLATED STEEL, pruning_predicate=CASE WHEN p_type_null_count@2 = p_type_row_count@3 THEN false ELSE p_type_min@0 <= LARGE PLATED STEEL AND LARGE PLATED STEEL <= p_type_max@1 END, required_guarantees=[p_type in (LARGE PLATED STEEL)] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_partkey@1], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] RaySQL Plan =========== @@ -121,40 +121,40 @@ ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "r_region ProjectionExec: expr=[r_regionkey@0 as r_regionkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: r_name@1 = MIDDLE EAST - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = MIDDLE EAST, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= MIDDLE EAST AND MIDDLE EAST <= r_name_max@1 END, required_guarantees=[r_name in (MIDDLE EAST)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = MIDDLE EAST, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= MIDDLE EAST AND MIDDLE EAST <= r_name_max@1 END, required_guarantees=[r_name in (MIDDLE EAST)] Query Stage #1 (1 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] Query Stage #2 (1 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_regionkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_regionkey] Query Stage #3 (4 -> 4): ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/edmondo/Development/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_nationkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_nationkey] Query Stage #4 (4 -> 4): ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@2 >= 1995-01-01 AND o_orderdate@2 <= 1996-12-31 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1995-01-01 AND o_orderdate@4 <= 1996-12-31, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 <= 1996-12-31 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1995-01-01 AND o_orderdate@4 <= 1996-12-31, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 <= 1996-12-31 END, required_guarantees=[] Query Stage #5 (2 -> 4): ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] Query Stage #6 (2 -> 4): ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) ProjectionExec: expr=[p_partkey@0 as p_partkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_type@1 = LARGE PLATED STEEL - ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/part.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_type], predicate=p_type@4 = LARGE PLATED STEEL, pruning_predicate=CASE WHEN p_type_null_count@2 = p_type_row_count@3 THEN false ELSE p_type_min@0 <= LARGE PLATED STEEL AND LARGE PLATED STEEL <= p_type_max@1 END, required_guarantees=[p_type in (LARGE PLATED STEEL)] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_type], predicate=p_type@4 = LARGE PLATED STEEL, pruning_predicate=CASE WHEN p_type_null_count@2 = p_type_row_count@3 THEN false ELSE p_type_min@0 <= LARGE PLATED STEEL AND LARGE PLATED STEEL <= p_type_max@1 END, required_guarantees=[p_type in (LARGE PLATED STEEL)] Query Stage #7 (4 -> 4): ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "l_partkey", index: 1 }], 4)) - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] Query Stage #8 (4 -> 4): ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4)) diff --git a/testdata/expected-plans/q9.txt b/testdata/expected-plans/q9.txt index e13b865..7537293 100644 --- a/testdata/expected-plans/q9.txt +++ b/testdata/expected-plans/q9.txt @@ -39,7 +39,7 @@ SortPreservingMergeExec: [nation@0 ASC NULLS LAST,o_year@1 DESC] HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[n_name@1, l_quantity@2, l_extendedprice@3, l_discount@4, ps_supplycost@6, o_orderdate@7] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([s_nationkey@3], 4), input_partitions=4 ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, ps_supplycost@5 as ps_supplycost, o_orderdate@0 as o_orderdate] @@ -47,7 +47,7 @@ SortPreservingMergeExec: [nation@0 ASC NULLS LAST,o_year@1 DESC] HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, l_quantity@3, l_extendedprice@4, l_discount@5, s_nationkey@6, ps_supplycost@7] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_orderdate] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_orderdate] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_quantity@2 as l_quantity, l_extendedprice@3 as l_extendedprice, l_discount@4 as l_discount, s_nationkey@5 as s_nationkey, ps_supplycost@0 as ps_supplycost] @@ -55,7 +55,7 @@ SortPreservingMergeExec: [nation@0 ASC NULLS LAST,o_year@1 DESC] HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@1, l_suppkey@2), (ps_partkey@0, l_partkey@1)], projection=[ps_supplycost@2, l_orderkey@3, l_quantity@6, l_extendedprice@7, l_discount@8, s_nationkey@9] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([ps_suppkey@1, ps_partkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_suppkey@2, l_partkey@1], 4), input_partitions=4 ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_partkey@2 as l_partkey, l_suppkey@3 as l_suppkey, l_quantity@4 as l_quantity, l_extendedprice@5 as l_extendedprice, l_discount@6 as l_discount, s_nationkey@0 as s_nationkey] @@ -64,7 +64,7 @@ SortPreservingMergeExec: [nation@0 ASC NULLS LAST,o_year@1 DESC] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_suppkey@2], 4), input_partitions=4 CoalesceBatchesExec: target_batch_size=8192 @@ -75,40 +75,40 @@ SortPreservingMergeExec: [nation@0 ASC NULLS LAST,o_year@1 DESC] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_name@1 LIKE %moccasin% RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/part.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE %moccasin% + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE %moccasin% CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_partkey@1], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] RaySQL Plan =========== Query Stage #0 (1 -> 4): ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] Query Stage #1 (4 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/edmondo/Development/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_orderdate] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_orderdate] Query Stage #2 (4 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }, Column { name: "ps_partkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/edmondo/Development/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] Query Stage #3 (2 -> 4): ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] Query Stage #4 (2 -> 4): ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) ProjectionExec: expr=[p_partkey@0 as p_partkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_name@1 LIKE %moccasin% - ParquetExec: file_groups={2 groups: [[home/edmondo/Development/datafusion-ray/data/part.parquet/part1.parquet], [home/edmondo/Development/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE %moccasin% + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE %moccasin% Query Stage #5 (4 -> 4): ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "l_partkey", index: 1 }], 4)) - ParquetExec: file_groups={4 groups: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/edmondo/Development/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] Query Stage #6 (4 -> 4): ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "l_suppkey", index: 2 }], 4)) From 2bfbdda050530e2563a2cde19a029beb394fcd2b Mon Sep 17 00:00:00 2001 From: Edmondo Porcu Date: Thu, 3 Oct 2024 14:38:38 -0400 Subject: [PATCH 03/14] Reverting to single partition --- .github/workflows/rust.yml | 2 +- Cargo.lock | 24 +++++++++ testdata/expected-plans/q1.txt | 4 +- testdata/expected-plans/q10.txt | 20 +++---- testdata/expected-plans/q11.txt | 38 +++++++------ testdata/expected-plans/q12.txt | 8 +-- testdata/expected-plans/q13.txt | 8 +-- testdata/expected-plans/q14.txt | 11 ++-- testdata/expected-plans/q16.txt | 18 +++---- testdata/expected-plans/q17.txt | 15 +++--- testdata/expected-plans/q18.txt | 16 +++--- testdata/expected-plans/q19.txt | 11 ++-- testdata/expected-plans/q2.txt | 61 ++++++++++----------- testdata/expected-plans/q20.txt | 30 +++++------ testdata/expected-plans/q21.txt | 31 ++++++----- testdata/expected-plans/q22.txt | 12 ++--- testdata/expected-plans/q3.txt | 12 ++--- testdata/expected-plans/q4.txt | 8 +-- testdata/expected-plans/q5.txt | 35 ++++++------ testdata/expected-plans/q6.txt | 4 +- testdata/expected-plans/q7.txt | 95 +++++++++++++++++---------------- testdata/expected-plans/q8.txt | 50 +++++++++-------- testdata/expected-plans/q9.txt | 34 ++++++------ 23 files changed, 277 insertions(+), 270 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 7706511..60c2bb8 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -8,7 +8,7 @@ env: CARGO_TERM_COLOR: always PYTHON_VERSION: 3.9 TPCH_SAMPLING_RATE: "1" # (1/100) - TPCH_TEST_PARTITIONS: "2" + TPCH_TEST_PARTITIONS: "1" TPCH_DATA_PATH: "data" jobs: diff --git a/Cargo.lock b/Cargo.lock index aa6cdff..ae66c8d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1115,12 +1115,14 @@ dependencies = [ name = "datafusion_ray" version = "0.6.0" dependencies = [ + "anyhow", "datafusion", "datafusion-proto", "datafusion-python", "futures", "glob", "log", + "pretty_assertions", "prost 0.12.6", "prost-types 0.12.6", "pyo3", @@ -1130,6 +1132,12 @@ dependencies = [ "uuid", ] +[[package]] +name = "diff" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" + [[package]] name = "digest" version = "0.10.7" @@ -2190,6 +2198,16 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "pretty_assertions" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ae130e2f271fbc2ac3a40fb1d07180839cdbbe443c7a27e1e3c13c5cac0116d" +dependencies = [ + "diff", + "yansi", +] + [[package]] name = "prettyplease" version = "0.1.25" @@ -3550,6 +3568,12 @@ dependencies = [ "lzma-sys", ] +[[package]] +name = "yansi" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" + [[package]] name = "zerocopy" version = "0.7.35" diff --git a/testdata/expected-plans/q1.txt b/testdata/expected-plans/q1.txt index 111c15f..27bea61 100644 --- a/testdata/expected-plans/q1.txt +++ b/testdata/expected-plans/q1.txt @@ -21,7 +21,7 @@ SortPreservingMergeExec: [l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, l_quantity@0 as l_quantity, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_tax@3 as l_tax, l_returnflag@4 as l_returnflag, l_linestatus@5 as l_linestatus] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@6 <= 1998-09-24 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], predicate=l_shipdate@10 <= 1998-09-24, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@0 <= 1998-09-24 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], predicate=l_shipdate@10 <= 1998-09-24, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@0 <= 1998-09-24 END, required_guarantees=[] RaySQL Plan =========== @@ -32,7 +32,7 @@ ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "l_return ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, l_quantity@0 as l_quantity, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_tax@3 as l_tax, l_returnflag@4 as l_returnflag, l_linestatus@5 as l_linestatus] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@6 <= 1998-09-24 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], predicate=l_shipdate@10 <= 1998-09-24, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@0 <= 1998-09-24 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], predicate=l_shipdate@10 <= 1998-09-24, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@0 <= 1998-09-24 END, required_guarantees=[] Query Stage #1 (4 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "l_returnflag", index: 0 }, Column { name: "l_linestatus", index: 1 }], 4)) diff --git a/testdata/expected-plans/q10.txt b/testdata/expected-plans/q10.txt index c87cc6d..1d743e9 100644 --- a/testdata/expected-plans/q10.txt +++ b/testdata/expected-plans/q10.txt @@ -35,8 +35,8 @@ GlobalLimitExec: skip=0, fetch=20 CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], projection=[n_name@1, c_custkey@2, c_name@3, c_address@4, c_phone@6, c_acctbal@7, c_comment@8, l_extendedprice@9, l_discount@10] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] + RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([c_nationkey@3], 4), input_partitions=4 CoalesceBatchesExec: target_batch_size=8192 @@ -47,37 +47,37 @@ GlobalLimitExec: skip=0, fetch=20 HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, o_orderkey@7] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_0.parquet:0..3472684, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:0..3399474], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:3399474..3842490, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_2.parquet:0..3498085, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:0..2931057], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:2931057..3110748, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..6692467], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6692467..6784093, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..6780531]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4 ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@2 >= 1993-07-01 AND o_orderdate@2 < 1993-10-01 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1993-07-01 AND o_orderdate@4 < 1993-10-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1993-07-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1993-10-01 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1993-07-01 AND o_orderdate@4 < 1993-10-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1993-07-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1993-10-01 END, required_guarantees=[] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_returnflag@3 = R - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], predicate=l_returnflag@8 = R, pruning_predicate=CASE WHEN l_returnflag_null_count@2 = l_returnflag_row_count@3 THEN false ELSE l_returnflag_min@0 <= R AND R <= l_returnflag_max@1 END, required_guarantees=[l_returnflag in (R)] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], predicate=l_returnflag@8 = R, pruning_predicate=CASE WHEN l_returnflag_null_count@2 = l_returnflag_row_count@3 THEN false ELSE l_returnflag_min@0 <= R AND R <= l_returnflag_max@1 END, required_guarantees=[l_returnflag in (R)] RaySQL Plan =========== -Query Stage #0 (1 -> 4): +Query Stage #0 (2 -> 4): ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] Query Stage #1 (4 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_0.parquet:0..3472684, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:0..3399474], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:3399474..3842490, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_2.parquet:0..3498085, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:0..2931057], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:2931057..3110748, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..6692467], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6692467..6784093, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..6780531]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] Query Stage #2 (4 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@2 >= 1993-07-01 AND o_orderdate@2 < 1993-10-01 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1993-07-01 AND o_orderdate@4 < 1993-10-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1993-07-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1993-10-01 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1993-07-01 AND o_orderdate@4 < 1993-10-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1993-07-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1993-10-01 END, required_guarantees=[] Query Stage #3 (4 -> 4): ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "o_orderkey", index: 7 }], 4)) @@ -93,7 +93,7 @@ ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "l_orderk ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_returnflag@3 = R - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], predicate=l_returnflag@8 = R, pruning_predicate=CASE WHEN l_returnflag_null_count@2 = l_returnflag_row_count@3 THEN false ELSE l_returnflag_min@0 <= R AND R <= l_returnflag_max@1 END, required_guarantees=[l_returnflag in (R)] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], predicate=l_returnflag@8 = R, pruning_predicate=CASE WHEN l_returnflag_null_count@2 = l_returnflag_row_count@3 THEN false ELSE l_returnflag_min@0 <= R AND R <= l_returnflag_max@1 END, required_guarantees=[l_returnflag in (R)] Query Stage #5 (4 -> 4): ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "c_nationkey", index: 3 }], 4)) diff --git a/testdata/expected-plans/q11.txt b/testdata/expected-plans/q11.txt index 84a0680..8b1556b 100644 --- a/testdata/expected-plans/q11.txt +++ b/testdata/expected-plans/q11.txt @@ -42,11 +42,11 @@ SortPreservingMergeExec: [value@1 DESC] HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[ps_availqty@1, ps_supplycost@2] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 ProjectionExec: expr=[n_nationkey@0 as n_nationkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = ALGERIA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4 ProjectionExec: expr=[ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] @@ -54,11 +54,10 @@ SortPreservingMergeExec: [value@1 DESC] HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_nationkey@1, ps_availqty@3, ps_supplycost@4] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_0.parquet:0..11328263, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:0..9740046], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:9740046..10585781, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_2.parquet:0..9946761, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:0..10275813], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:10275813..10620507, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..20723615], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:20723615..20895812, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..20896109]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost] AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4 @@ -67,11 +66,11 @@ SortPreservingMergeExec: [value@1 DESC] HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[ps_partkey@1, ps_availqty@2, ps_supplycost@3] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 ProjectionExec: expr=[n_nationkey@0 as n_nationkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = ALGERIA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([s_nationkey@3], 4), input_partitions=4 ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_availqty@2 as ps_availqty, ps_supplycost@3 as ps_supplycost, s_nationkey@0 as s_nationkey] @@ -79,29 +78,28 @@ SortPreservingMergeExec: [value@1 DESC] HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_availqty@4, ps_supplycost@5] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([ps_suppkey@1], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_0.parquet:0..11328263, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:0..9740046], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:9740046..10585781, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_2.parquet:0..9946761, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:0..10275813], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:10275813..10620507, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..20723615], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:20723615..20895812, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..20896109]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] RaySQL Plan =========== -Query Stage #0 (1 -> 4): +Query Stage #0 (2 -> 4): ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) ProjectionExec: expr=[n_nationkey@0 as n_nationkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = ALGERIA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] -Query Stage #1 (2 -> 4): +Query Stage #1 (4 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] Query Stage #2 (4 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_0.parquet:0..11328263, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:0..9740046], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:9740046..10585781, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_2.parquet:0..9946761, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:0..10275813], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:10275813..10620507, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..20723615], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:20723615..20895812, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..20896109]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost] Query Stage #3 (4 -> 4): ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) @@ -123,20 +121,20 @@ ShuffleWriterExec(stage_id=4, output_partitioning=Hash([], 4)) CoalesceBatchesExec: target_batch_size=8192 ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) -Query Stage #5 (1 -> 4): +Query Stage #5 (2 -> 4): ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) ProjectionExec: expr=[n_nationkey@0 as n_nationkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = ALGERIA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] -Query Stage #6 (2 -> 4): +Query Stage #6 (4 -> 4): ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] Query Stage #7 (4 -> 4): ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_0.parquet:0..11328263, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:0..9740046], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:9740046..10585781, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_2.parquet:0..9946761, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:0..10275813], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:10275813..10620507, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..20723615], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:20723615..20895812, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..20896109]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] Query Stage #8 (4 -> 4): ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4)) diff --git a/testdata/expected-plans/q12.txt b/testdata/expected-plans/q12.txt index 43f07ed..ba752e4 100644 --- a/testdata/expected-plans/q12.txt +++ b/testdata/expected-plans/q12.txt @@ -29,10 +29,10 @@ SortPreservingMergeExec: [l_shipmode@0 ASC NULLS LAST] ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_shipmode@4 as l_shipmode] CoalesceBatchesExec: target_batch_size=8192 FilterExec: (l_shipmode@4 = FOB OR l_shipmode@4 = SHIP) AND l_receiptdate@3 > l_commitdate@2 AND l_shipdate@1 < l_commitdate@2 AND l_receiptdate@3 >= 1995-01-01 AND l_receiptdate@3 < 1996-01-01 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode], predicate=(l_shipmode@14 = FOB OR l_shipmode@14 = SHIP) AND l_receiptdate@12 > l_commitdate@11 AND l_shipdate@10 < l_commitdate@11 AND l_receiptdate@12 >= 1995-01-01 AND l_receiptdate@12 < 1996-01-01, pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= FOB AND FOB <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= SHIP AND SHIP <= l_shipmode_max@1 END) AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_max@4 >= 1995-01-01 END AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_min@7 < 1996-01-01 END, required_guarantees=[l_shipmode in (SHIP, FOB)] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode], predicate=(l_shipmode@14 = FOB OR l_shipmode@14 = SHIP) AND l_receiptdate@12 > l_commitdate@11 AND l_shipdate@10 < l_commitdate@11 AND l_receiptdate@12 >= 1995-01-01 AND l_receiptdate@12 < 1996-01-01, pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= FOB AND FOB <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= SHIP AND SHIP <= l_shipmode_max@1 END) AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_max@4 >= 1995-01-01 END AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_min@7 < 1996-01-01 END, required_guarantees=[l_shipmode in (FOB, SHIP)] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_orderpriority] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_orderpriority] RaySQL Plan =========== @@ -42,11 +42,11 @@ ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "l_orderk ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_shipmode@4 as l_shipmode] CoalesceBatchesExec: target_batch_size=8192 FilterExec: (l_shipmode@4 = FOB OR l_shipmode@4 = SHIP) AND l_receiptdate@3 > l_commitdate@2 AND l_shipdate@1 < l_commitdate@2 AND l_receiptdate@3 >= 1995-01-01 AND l_receiptdate@3 < 1996-01-01 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode], predicate=(l_shipmode@14 = FOB OR l_shipmode@14 = SHIP) AND l_receiptdate@12 > l_commitdate@11 AND l_shipdate@10 < l_commitdate@11 AND l_receiptdate@12 >= 1995-01-01 AND l_receiptdate@12 < 1996-01-01, pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= FOB AND FOB <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= SHIP AND SHIP <= l_shipmode_max@1 END) AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_max@4 >= 1995-01-01 END AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_min@7 < 1996-01-01 END, required_guarantees=[l_shipmode in (SHIP, FOB)] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode], predicate=(l_shipmode@14 = FOB OR l_shipmode@14 = SHIP) AND l_receiptdate@12 > l_commitdate@11 AND l_shipdate@10 < l_commitdate@11 AND l_receiptdate@12 >= 1995-01-01 AND l_receiptdate@12 < 1996-01-01, pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= FOB AND FOB <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= SHIP AND SHIP <= l_shipmode_max@1 END) AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_max@4 >= 1995-01-01 END AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_min@7 < 1996-01-01 END, required_guarantees=[l_shipmode in (FOB, SHIP)] Query Stage #1 (4 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_orderpriority] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_orderpriority] Query Stage #2 (4 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "l_shipmode", index: 0 }], 4)) diff --git a/testdata/expected-plans/q13.txt b/testdata/expected-plans/q13.txt index cb2911a..08e307a 100644 --- a/testdata/expected-plans/q13.txt +++ b/testdata/expected-plans/q13.txt @@ -30,27 +30,27 @@ SortPreservingMergeExec: [custdist@1 DESC,c_count@0 DESC] HashJoinExec: mode=Partitioned, join_type=Left, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, o_orderkey@1] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_0.parquet:0..3472684, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:0..3399474], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:3399474..3842490, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_2.parquet:0..3498085, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:0..2931057], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:2931057..3110748, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..6692467], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6692467..6784093, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..6780531]]}, projection=[c_custkey] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4 ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_comment@2 NOT LIKE %express%requests% - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_comment], predicate=o_comment@8 NOT LIKE %express%requests% + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_custkey, o_comment], predicate=o_comment@8 NOT LIKE %express%requests% RaySQL Plan =========== Query Stage #0 (4 -> 4): ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_0.parquet:0..3472684, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:0..3399474], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:3399474..3842490, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_2.parquet:0..3498085, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:0..2931057], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:2931057..3110748, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..6692467], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6692467..6784093, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..6780531]]}, projection=[c_custkey] Query Stage #1 (4 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_comment@2 NOT LIKE %express%requests% - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_comment], predicate=o_comment@8 NOT LIKE %express%requests% + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_custkey, o_comment], predicate=o_comment@8 NOT LIKE %express%requests% Query Stage #2 (4 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "c_count", index: 0 }], 4)) diff --git a/testdata/expected-plans/q14.txt b/testdata/expected-plans/q14.txt index fa366f3..e8eb357 100644 --- a/testdata/expected-plans/q14.txt +++ b/testdata/expected-plans/q14.txt @@ -22,28 +22,27 @@ ProjectionExec: expr=[100 * CAST(sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") T HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_type@1, l_extendedprice@3, l_discount@4] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_type] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet:0..3454243, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:0..61547], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:61547..3452413, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:0..124924], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:124924..2056321, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_1.parquet:0..1526046, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:0..58347], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:58347..1788444, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_3.parquet:0..1785690]]}, projection=[p_partkey, p_type] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4 ProjectionExec: expr=[l_partkey@0 as l_partkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@3 >= 1995-02-01 AND l_shipdate@3 < 1995-03-01 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-02-01 AND l_shipdate@10 < 1995-03-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-02-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-03-01 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-02-01 AND l_shipdate@10 < 1995-03-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-02-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-03-01 END, required_guarantees=[] RaySQL Plan =========== -Query Stage #0 (2 -> 4): +Query Stage #0 (4 -> 4): ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_type] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet:0..3454243, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:0..61547], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:61547..3452413, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:0..124924], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:124924..2056321, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_1.parquet:0..1526046, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:0..58347], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:58347..1788444, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_3.parquet:0..1785690]]}, projection=[p_partkey, p_type] Query Stage #1 (4 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) ProjectionExec: expr=[l_partkey@0 as l_partkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@3 >= 1995-02-01 AND l_shipdate@3 < 1995-03-01 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-02-01 AND l_shipdate@10 < 1995-03-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-02-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-03-01 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-02-01 AND l_shipdate@10 < 1995-03-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-02-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-03-01 END, required_guarantees=[] Query Stage #2 (4 -> 1): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([], 4)) diff --git a/testdata/expected-plans/q16.txt b/testdata/expected-plans/q16.txt index b72c9f1..0738c4b 100644 --- a/testdata/expected-plans/q16.txt +++ b/testdata/expected-plans/q16.txt @@ -37,8 +37,7 @@ SortPreservingMergeExec: [supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 ProjectionExec: expr=[s_suppkey@0 as s_suppkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: s_comment@1 LIKE %Customer%Complaints% - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_comment], predicate=s_comment@6 LIKE %Customer%Complaints% + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_comment], predicate=s_comment@6 LIKE %Customer%Complaints% CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4 ProjectionExec: expr=[ps_suppkey@3 as ps_suppkey, p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size] @@ -48,31 +47,30 @@ SortPreservingMergeExec: [supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_brand@1 != Brand#14 AND p_type@2 NOT LIKE SMALL PLATED% AND Use p_size@3 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]) - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_brand, p_type, p_size], predicate=p_brand@3 != Brand#14 AND p_type@4 NOT LIKE SMALL PLATED% AND Use p_size@5 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]), pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 != Brand#14 OR Brand#14 != p_brand_max@1 END AND (CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 14 AND 14 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 6 AND 6 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 5 AND 5 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 31 AND 31 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 49 AND 49 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 15 AND 15 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 41 AND 41 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 47 AND 47 <= p_size_max@5 END), required_guarantees=[p_brand not in (Brand#14), p_size in (41, 14, 49, 6, 31, 15, 5, 47)] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet:0..3454243, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:0..61547], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:61547..3452413, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:0..124924], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:124924..2056321, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_1.parquet:0..1526046, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:0..58347], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:58347..1788444, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_3.parquet:0..1785690]]}, projection=[p_partkey, p_brand, p_type, p_size], predicate=p_brand@3 != Brand#14 AND p_type@4 NOT LIKE SMALL PLATED% AND Use p_size@5 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]), pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 != Brand#14 OR Brand#14 != p_brand_max@1 END AND (CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 14 AND 14 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 6 AND 6 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 5 AND 5 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 31 AND 31 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 49 AND 49 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 15 AND 15 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 41 AND 41 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 47 AND 47 <= p_size_max@5 END), required_guarantees=[p_brand not in (Brand#14), p_size in (6, 47, 5, 14, 15, 41, 49, 31)] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_0.parquet:0..11328263, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:0..9740046], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:9740046..10585781, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_2.parquet:0..9946761, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:0..10275813], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:10275813..10620507, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..20723615], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:20723615..20895812, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..20896109]]}, projection=[ps_partkey, ps_suppkey] RaySQL Plan =========== -Query Stage #0 (2 -> 4): +Query Stage #0 (4 -> 4): ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) ProjectionExec: expr=[s_suppkey@0 as s_suppkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: s_comment@1 LIKE %Customer%Complaints% - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_comment], predicate=s_comment@6 LIKE %Customer%Complaints% + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_comment], predicate=s_comment@6 LIKE %Customer%Complaints% -Query Stage #1 (2 -> 4): +Query Stage #1 (4 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_brand@1 != Brand#14 AND p_type@2 NOT LIKE SMALL PLATED% AND Use p_size@3 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]) - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_brand, p_type, p_size], predicate=p_brand@3 != Brand#14 AND p_type@4 NOT LIKE SMALL PLATED% AND Use p_size@5 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]), pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 != Brand#14 OR Brand#14 != p_brand_max@1 END AND (CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 14 AND 14 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 6 AND 6 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 5 AND 5 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 31 AND 31 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 49 AND 49 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 15 AND 15 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 41 AND 41 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 47 AND 47 <= p_size_max@5 END), required_guarantees=[p_brand not in (Brand#14), p_size in (41, 14, 49, 6, 31, 15, 5, 47)] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet:0..3454243, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:0..61547], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:61547..3452413, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:0..124924], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:124924..2056321, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_1.parquet:0..1526046, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:0..58347], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:58347..1788444, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_3.parquet:0..1785690]]}, projection=[p_partkey, p_brand, p_type, p_size], predicate=p_brand@3 != Brand#14 AND p_type@4 NOT LIKE SMALL PLATED% AND Use p_size@5 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]), pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 != Brand#14 OR Brand#14 != p_brand_max@1 END AND (CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 14 AND 14 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 6 AND 6 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 5 AND 5 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 31 AND 31 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 49 AND 49 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 15 AND 15 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 41 AND 41 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 47 AND 47 <= p_size_max@5 END), required_guarantees=[p_brand not in (Brand#14), p_size in (6, 47, 5, 14, 15, 41, 49, 31)] Query Stage #2 (4 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_0.parquet:0..11328263, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:0..9740046], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:9740046..10585781, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_2.parquet:0..9946761, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:0..10275813], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:10275813..10620507, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..20723615], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:20723615..20895812, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..20896109]]}, projection=[ps_partkey, ps_suppkey] Query Stage #3 (4 -> 4): ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4)) diff --git a/testdata/expected-plans/q17.txt b/testdata/expected-plans/q17.txt index 60c9796..e23b9ba 100644 --- a/testdata/expected-plans/q17.txt +++ b/testdata/expected-plans/q17.txt @@ -33,36 +33,35 @@ ProjectionExec: expr=[CAST(sum(lineitem.l_extendedprice)@0 AS Float64) / 7 as av ProjectionExec: expr=[p_partkey@0 as p_partkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_brand@1 = Brand#42 AND p_container@2 = LG BAG - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_brand, p_container], predicate=p_brand@3 = Brand#42 AND p_container@6 = LG BAG, pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 <= Brand#42 AND Brand#42 <= p_brand_max@1 END AND CASE WHEN p_container_null_count@6 = p_container_row_count@7 THEN false ELSE p_container_min@4 <= LG BAG AND LG BAG <= p_container_max@5 END, required_guarantees=[p_brand in (Brand#42), p_container in (LG BAG)] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet:0..3454243, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:0..61547], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:61547..3452413, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:0..124924], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:124924..2056321, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_1.parquet:0..1526046, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:0..58347], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:58347..1788444, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_3.parquet:0..1785690]]}, projection=[p_partkey, p_brand, p_container], predicate=p_brand@3 = Brand#42 AND p_container@6 = LG BAG, pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 <= Brand#42 AND Brand#42 <= p_brand_max@1 END AND CASE WHEN p_container_null_count@6 = p_container_row_count@7 THEN false ELSE p_container_min@4 <= LG BAG AND LG BAG <= p_container_max@5 END, required_guarantees=[p_brand in (Brand#42), p_container in (LG BAG)] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_quantity, l_extendedprice] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_partkey, l_quantity, l_extendedprice] ProjectionExec: expr=[CAST(0.2 * CAST(avg(lineitem.l_quantity)@1 AS Float64) AS Decimal128(30, 15)) as Float64(0.2) * avg(lineitem.l_quantity), l_partkey@0 as l_partkey] AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4 AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_quantity] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_partkey, l_quantity] RaySQL Plan =========== -Query Stage #0 (2 -> 4): +Query Stage #0 (4 -> 4): ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) ProjectionExec: expr=[p_partkey@0 as p_partkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_brand@1 = Brand#42 AND p_container@2 = LG BAG - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_brand, p_container], predicate=p_brand@3 = Brand#42 AND p_container@6 = LG BAG, pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 <= Brand#42 AND Brand#42 <= p_brand_max@1 END AND CASE WHEN p_container_null_count@6 = p_container_row_count@7 THEN false ELSE p_container_min@4 <= LG BAG AND LG BAG <= p_container_max@5 END, required_guarantees=[p_brand in (Brand#42), p_container in (LG BAG)] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet:0..3454243, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:0..61547], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:61547..3452413, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:0..124924], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:124924..2056321, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_1.parquet:0..1526046, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:0..58347], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:58347..1788444, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_3.parquet:0..1785690]]}, projection=[p_partkey, p_brand, p_container], predicate=p_brand@3 = Brand#42 AND p_container@6 = LG BAG, pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 <= Brand#42 AND Brand#42 <= p_brand_max@1 END AND CASE WHEN p_container_null_count@6 = p_container_row_count@7 THEN false ELSE p_container_min@4 <= LG BAG AND LG BAG <= p_container_max@5 END, required_guarantees=[p_brand in (Brand#42), p_container in (LG BAG)] Query Stage #1 (4 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_quantity, l_extendedprice] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_partkey, l_quantity, l_extendedprice] Query Stage #2 (4 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_quantity] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_partkey, l_quantity] Query Stage #3 (4 -> 1): ShuffleWriterExec(stage_id=3, output_partitioning=Hash([], 4)) diff --git a/testdata/expected-plans/q18.txt b/testdata/expected-plans/q18.txt index eac2add..1952e3b 100644 --- a/testdata/expected-plans/q18.txt +++ b/testdata/expected-plans/q18.txt @@ -37,7 +37,7 @@ GlobalLimitExec: skip=0, fetch=100 CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 AggregateExec: mode=Partial, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_quantity] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_quantity] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@2, l_orderkey@0)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@3, o_orderdate@4, l_quantity@6] CoalesceBatchesExec: target_batch_size=8192 @@ -46,13 +46,13 @@ GlobalLimitExec: skip=0, fetch=100 HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@4, o_orderdate@5] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_name] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_0.parquet:0..3472684, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:0..3399474], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:3399474..3842490, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_2.parquet:0..3498085, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:0..2931057], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:2931057..3110748, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..6692467], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6692467..6784093, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..6780531]]}, projection=[c_custkey, c_name] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_quantity] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_quantity] RaySQL Plan =========== @@ -60,15 +60,15 @@ RaySQL Plan Query Stage #0 (4 -> 4): ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) AggregateExec: mode=Partial, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_quantity] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_quantity] Query Stage #1 (4 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_name] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_0.parquet:0..3472684, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:0..3399474], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:3399474..3842490, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_2.parquet:0..3498085, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:0..2931057], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:2931057..3110748, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..6692467], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6692467..6784093, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..6780531]]}, projection=[c_custkey, c_name] Query Stage #2 (4 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] Query Stage #3 (4 -> 4): ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "o_orderkey", index: 2 }], 4)) @@ -81,7 +81,7 @@ ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "o_orderk Query Stage #4 (4 -> 4): ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_quantity] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_quantity] Query Stage #5 (4 -> 4): ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "c_name", index: 0 }, Column { name: "c_custkey", index: 1 }, Column { name: "o_orderkey", index: 2 }, Column { name: "o_orderdate", index: 3 }, Column { name: "o_totalprice", index: 4 }], 4)) diff --git a/testdata/expected-plans/q19.txt b/testdata/expected-plans/q19.txt index 6f46143..de5b3a8 100644 --- a/testdata/expected-plans/q19.txt +++ b/testdata/expected-plans/q19.txt @@ -24,30 +24,29 @@ ProjectionExec: expr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_disco RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 CoalesceBatchesExec: target_batch_size=8192 FilterExec: (p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@2 <= 15) AND p_size@2 >= 1 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_brand, p_size, p_container], predicate=p_size@5 >= 1 AND (p_brand@3 = Brand#21 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@5 <= 5 OR p_brand@3 = Brand#13 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@5 <= 10 OR p_brand@3 = Brand#52 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@5 <= 15), pruning_predicate=CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_max@0 >= 1 END AND (CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#21 AND Brand#21 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM CASE AND SM CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM BOX AND SM BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PACK AND SM PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PKG AND SM PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 5 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#13 AND Brand#13 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BAG AND MED BAG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BOX AND MED BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PKG AND MED PKG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PACK AND MED PACK <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 10 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#52 AND Brand#52 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG CASE AND LG CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG BOX AND LG BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PACK AND LG PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PKG AND LG PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 15 END), required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet:0..3454243, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:0..61547], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:61547..3452413, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:0..124924], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:124924..2056321, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_1.parquet:0..1526046, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:0..58347], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:58347..1788444, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_3.parquet:0..1785690]]}, projection=[p_partkey, p_brand, p_size, p_container], predicate=p_size@5 >= 1 AND (p_brand@3 = Brand#21 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@5 <= 5 OR p_brand@3 = Brand#13 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@5 <= 10 OR p_brand@3 = Brand#52 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@5 <= 15), pruning_predicate=CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_max@0 >= 1 END AND (CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#21 AND Brand#21 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM CASE AND SM CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM BOX AND SM BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PACK AND SM PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PKG AND SM PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 5 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#13 AND Brand#13 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BAG AND MED BAG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BOX AND MED BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PKG AND MED PKG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PACK AND MED PACK <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 10 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#52 AND Brand#52 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG CASE AND LG CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG BOX AND LG BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PACK AND LG PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PKG AND LG PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 15 END), required_guarantees=[] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4 ProjectionExec: expr=[l_partkey@0 as l_partkey, l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: (l_quantity@1 >= Some(800),11,2 AND l_quantity@1 <= Some(1800),11,2 OR l_quantity@1 >= Some(2000),11,2 AND l_quantity@1 <= Some(3000),11,2 OR l_quantity@1 >= Some(3000),11,2 AND l_quantity@1 <= Some(4000),11,2) AND (l_shipmode@5 = AIR OR l_shipmode@5 = AIR REG) AND l_shipinstruct@4 = DELIVER IN PERSON - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], predicate=(l_shipmode@14 = AIR OR l_shipmode@14 = AIR REG) AND l_shipinstruct@13 = DELIVER IN PERSON AND (l_quantity@4 >= Some(800),11,2 AND l_quantity@4 <= Some(1800),11,2 OR l_quantity@4 >= Some(2000),11,2 AND l_quantity@4 <= Some(3000),11,2 OR l_quantity@4 >= Some(3000),11,2 AND l_quantity@4 <= Some(4000),11,2), pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR AND AIR <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR REG AND AIR REG <= l_shipmode_max@1 END) AND CASE WHEN l_shipinstruct_null_count@6 = l_shipinstruct_row_count@7 THEN false ELSE l_shipinstruct_min@4 <= DELIVER IN PERSON AND DELIVER IN PERSON <= l_shipinstruct_max@5 END AND (CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(800),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(1800),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(2000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(3000),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(3000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(4000),11,2 END), required_guarantees=[l_shipmode in (AIR REG, AIR), l_shipinstruct in (DELIVER IN PERSON)] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], predicate=(l_shipmode@14 = AIR OR l_shipmode@14 = AIR REG) AND l_shipinstruct@13 = DELIVER IN PERSON AND (l_quantity@4 >= Some(800),11,2 AND l_quantity@4 <= Some(1800),11,2 OR l_quantity@4 >= Some(2000),11,2 AND l_quantity@4 <= Some(3000),11,2 OR l_quantity@4 >= Some(3000),11,2 AND l_quantity@4 <= Some(4000),11,2), pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR AND AIR <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR REG AND AIR REG <= l_shipmode_max@1 END) AND CASE WHEN l_shipinstruct_null_count@6 = l_shipinstruct_row_count@7 THEN false ELSE l_shipinstruct_min@4 <= DELIVER IN PERSON AND DELIVER IN PERSON <= l_shipinstruct_max@5 END AND (CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(800),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(1800),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(2000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(3000),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(3000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(4000),11,2 END), required_guarantees=[l_shipmode in (AIR REG, AIR), l_shipinstruct in (DELIVER IN PERSON)] RaySQL Plan =========== -Query Stage #0 (2 -> 4): +Query Stage #0 (4 -> 4): ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) CoalesceBatchesExec: target_batch_size=8192 FilterExec: (p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@2 <= 15) AND p_size@2 >= 1 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_brand, p_size, p_container], predicate=p_size@5 >= 1 AND (p_brand@3 = Brand#21 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@5 <= 5 OR p_brand@3 = Brand#13 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@5 <= 10 OR p_brand@3 = Brand#52 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@5 <= 15), pruning_predicate=CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_max@0 >= 1 END AND (CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#21 AND Brand#21 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM CASE AND SM CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM BOX AND SM BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PACK AND SM PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PKG AND SM PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 5 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#13 AND Brand#13 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BAG AND MED BAG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BOX AND MED BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PKG AND MED PKG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PACK AND MED PACK <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 10 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#52 AND Brand#52 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG CASE AND LG CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG BOX AND LG BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PACK AND LG PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PKG AND LG PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 15 END), required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet:0..3454243, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:0..61547], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:61547..3452413, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:0..124924], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:124924..2056321, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_1.parquet:0..1526046, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:0..58347], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:58347..1788444, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_3.parquet:0..1785690]]}, projection=[p_partkey, p_brand, p_size, p_container], predicate=p_size@5 >= 1 AND (p_brand@3 = Brand#21 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@5 <= 5 OR p_brand@3 = Brand#13 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@5 <= 10 OR p_brand@3 = Brand#52 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@5 <= 15), pruning_predicate=CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_max@0 >= 1 END AND (CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#21 AND Brand#21 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM CASE AND SM CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM BOX AND SM BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PACK AND SM PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PKG AND SM PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 5 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#13 AND Brand#13 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BAG AND MED BAG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BOX AND MED BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PKG AND MED PKG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PACK AND MED PACK <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 10 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#52 AND Brand#52 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG CASE AND LG CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG BOX AND LG BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PACK AND LG PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PKG AND LG PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 15 END), required_guarantees=[] Query Stage #1 (4 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) ProjectionExec: expr=[l_partkey@0 as l_partkey, l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: (l_quantity@1 >= Some(800),11,2 AND l_quantity@1 <= Some(1800),11,2 OR l_quantity@1 >= Some(2000),11,2 AND l_quantity@1 <= Some(3000),11,2 OR l_quantity@1 >= Some(3000),11,2 AND l_quantity@1 <= Some(4000),11,2) AND (l_shipmode@5 = AIR OR l_shipmode@5 = AIR REG) AND l_shipinstruct@4 = DELIVER IN PERSON - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], predicate=(l_shipmode@14 = AIR OR l_shipmode@14 = AIR REG) AND l_shipinstruct@13 = DELIVER IN PERSON AND (l_quantity@4 >= Some(800),11,2 AND l_quantity@4 <= Some(1800),11,2 OR l_quantity@4 >= Some(2000),11,2 AND l_quantity@4 <= Some(3000),11,2 OR l_quantity@4 >= Some(3000),11,2 AND l_quantity@4 <= Some(4000),11,2), pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR AND AIR <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR REG AND AIR REG <= l_shipmode_max@1 END) AND CASE WHEN l_shipinstruct_null_count@6 = l_shipinstruct_row_count@7 THEN false ELSE l_shipinstruct_min@4 <= DELIVER IN PERSON AND DELIVER IN PERSON <= l_shipinstruct_max@5 END AND (CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(800),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(1800),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(2000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(3000),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(3000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(4000),11,2 END), required_guarantees=[l_shipmode in (AIR REG, AIR), l_shipinstruct in (DELIVER IN PERSON)] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], predicate=(l_shipmode@14 = AIR OR l_shipmode@14 = AIR REG) AND l_shipinstruct@13 = DELIVER IN PERSON AND (l_quantity@4 >= Some(800),11,2 AND l_quantity@4 <= Some(1800),11,2 OR l_quantity@4 >= Some(2000),11,2 AND l_quantity@4 <= Some(3000),11,2 OR l_quantity@4 >= Some(3000),11,2 AND l_quantity@4 <= Some(4000),11,2), pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR AND AIR <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR REG AND AIR REG <= l_shipmode_max@1 END) AND CASE WHEN l_shipinstruct_null_count@6 = l_shipinstruct_row_count@7 THEN false ELSE l_shipinstruct_min@4 <= DELIVER IN PERSON AND DELIVER IN PERSON <= l_shipinstruct_max@5 END AND (CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(800),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(1800),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(2000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(3000),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(3000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(4000),11,2 END), required_guarantees=[l_shipmode in (AIR REG, AIR), l_shipinstruct in (DELIVER IN PERSON)] Query Stage #2 (4 -> 1): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([], 4)) diff --git a/testdata/expected-plans/q2.txt b/testdata/expected-plans/q2.txt index 16213f0..e19e372 100644 --- a/testdata/expected-plans/q2.txt +++ b/testdata/expected-plans/q2.txt @@ -53,19 +53,19 @@ GlobalLimitExec: skip=0, fetch=100 HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@9)], projection=[p_partkey@1, p_mfgr@2, s_name@3, s_address@4, s_phone@5, s_acctbal@6, s_comment@7, ps_supplycost@8, n_name@9] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 ProjectionExec: expr=[r_regionkey@0 as r_regionkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: r_name@1 = ASIA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/8NlZWnksIQjnYDPs_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([n_regionkey@9], 4), input_partitions=4 ProjectionExec: expr=[p_partkey@2 as p_partkey, p_mfgr@3 as p_mfgr, s_name@4 as s_name, s_address@5 as s_address, s_phone@6 as s_phone, s_acctbal@7 as s_acctbal, s_comment@8 as s_comment, ps_supplycost@9 as ps_supplycost, n_name@0 as n_name, n_regionkey@1 as n_regionkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@4)], projection=[n_name@1, n_regionkey@2, p_partkey@3, p_mfgr@4, s_name@5, s_address@6, s_phone@8, s_acctbal@9, s_comment@10, ps_supplycost@11] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] + RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([s_nationkey@4], 4), input_partitions=4 ProjectionExec: expr=[p_partkey@6 as p_partkey, p_mfgr@7 as p_mfgr, s_name@0 as s_name, s_address@1 as s_address, s_nationkey@2 as s_nationkey, s_phone@3 as s_phone, s_acctbal@4 as s_acctbal, s_comment@5 as s_comment, ps_supplycost@8 as ps_supplycost] @@ -73,8 +73,7 @@ GlobalLimitExec: skip=0, fetch=100 HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@2)], projection=[s_name@1, s_address@2, s_nationkey@3, s_phone@4, s_acctbal@5, s_comment@6, p_partkey@7, p_mfgr@8, ps_supplycost@10] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([ps_suppkey@2], 4), input_partitions=4 CoalesceBatchesExec: target_batch_size=8192 @@ -84,11 +83,10 @@ GlobalLimitExec: skip=0, fetch=100 ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_size@3 = 48 AND p_type@2 LIKE %TIN - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_mfgr, p_type, p_size], predicate=p_size@5 = 48 AND p_type@4 LIKE %TIN, pruning_predicate=CASE WHEN p_size_null_count@2 = p_size_row_count@3 THEN false ELSE p_size_min@0 <= 48 AND 48 <= p_size_max@1 END, required_guarantees=[p_size in (48)] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet:0..3454243, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:0..61547], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:61547..3452413, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:0..124924], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:124924..2056321, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_1.parquet:0..1526046, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:0..58347], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:58347..1788444, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_3.parquet:0..1785690]]}, projection=[p_partkey, p_mfgr, p_type, p_size], predicate=p_size@5 = 48 AND p_type@4 LIKE %TIN, pruning_predicate=CASE WHEN p_size_null_count@2 = p_size_row_count@3 THEN false ELSE p_size_min@0 <= 48 AND 48 <= p_size_max@1 END, required_guarantees=[p_size in (48)] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_0.parquet:0..11328263, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:0..9740046], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:9740046..10585781, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_2.parquet:0..9946761, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:0..10275813], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:10275813..10620507, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..20723615], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:20723615..20895812, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..20896109]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([ps_partkey@1, min(partsupp.ps_supplycost)@0], 4), input_partitions=4 ProjectionExec: expr=[min(partsupp.ps_supplycost)@1 as min(partsupp.ps_supplycost), ps_partkey@0 as ps_partkey] @@ -100,19 +98,19 @@ GlobalLimitExec: skip=0, fetch=100 HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@2)], projection=[ps_partkey@1, ps_supplycost@2] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 ProjectionExec: expr=[r_regionkey@0 as r_regionkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: r_name@1 = ASIA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/8NlZWnksIQjnYDPs_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([n_regionkey@2], 4), input_partitions=4 ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, n_regionkey@0 as n_regionkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_regionkey@1, ps_partkey@2, ps_supplycost@3] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_regionkey] + RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_regionkey] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4 ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] @@ -120,40 +118,39 @@ GlobalLimitExec: skip=0, fetch=100 HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_supplycost@4] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([ps_suppkey@1], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_0.parquet:0..11328263, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:0..9740046], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:9740046..10585781, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_2.parquet:0..9946761, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:0..10275813], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:10275813..10620507, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..20723615], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:20723615..20895812, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..20896109]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] RaySQL Plan =========== -Query Stage #0 (1 -> 4): +Query Stage #0 (2 -> 4): ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) ProjectionExec: expr=[r_regionkey@0 as r_regionkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: r_name@1 = ASIA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/8NlZWnksIQjnYDPs_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] -Query Stage #1 (1 -> 4): +Query Stage #1 (2 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] -Query Stage #2 (2 -> 4): +Query Stage #2 (4 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] -Query Stage #3 (2 -> 4): +Query Stage #3 (4 -> 4): ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_size@3 = 48 AND p_type@2 LIKE %TIN - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_mfgr, p_type, p_size], predicate=p_size@5 = 48 AND p_type@4 LIKE %TIN, pruning_predicate=CASE WHEN p_size_null_count@2 = p_size_row_count@3 THEN false ELSE p_size_min@0 <= 48 AND 48 <= p_size_max@1 END, required_guarantees=[p_size in (48)] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet:0..3454243, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:0..61547], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:61547..3452413, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:0..124924], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:124924..2056321, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_1.parquet:0..1526046, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:0..58347], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:58347..1788444, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_3.parquet:0..1785690]]}, projection=[p_partkey, p_mfgr, p_type, p_size], predicate=p_size@5 = 48 AND p_type@4 LIKE %TIN, pruning_predicate=CASE WHEN p_size_null_count@2 = p_size_row_count@3 THEN false ELSE p_size_min@0 <= 48 AND 48 <= p_size_max@1 END, required_guarantees=[p_size in (48)] Query Stage #4 (4 -> 4): ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_0.parquet:0..11328263, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:0..9740046], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:9740046..10585781, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_2.parquet:0..9946761, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:0..10275813], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:10275813..10620507, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..20723615], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:20723615..20895812, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..20896109]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] Query Stage #5 (4 -> 4): ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "ps_suppkey", index: 2 }], 4)) @@ -193,24 +190,24 @@ ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "p_partke CoalesceBatchesExec: target_batch_size=8192 ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "n_regionkey", index: 9 }], 4)) -Query Stage #9 (1 -> 4): +Query Stage #9 (2 -> 4): ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) ProjectionExec: expr=[r_regionkey@0 as r_regionkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: r_name@1 = ASIA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/8NlZWnksIQjnYDPs_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] -Query Stage #10 (1 -> 4): +Query Stage #10 (2 -> 4): ShuffleWriterExec(stage_id=10, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_regionkey] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_regionkey] -Query Stage #11 (2 -> 4): +Query Stage #11 (4 -> 4): ShuffleWriterExec(stage_id=11, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] Query Stage #12 (4 -> 4): ShuffleWriterExec(stage_id=12, output_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_0.parquet:0..11328263, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:0..9740046], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:9740046..10585781, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_2.parquet:0..9946761, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:0..10275813], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:10275813..10620507, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..20723615], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:20723615..20895812, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..20896109]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] Query Stage #13 (4 -> 4): ShuffleWriterExec(stage_id=13, output_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) diff --git a/testdata/expected-plans/q20.txt b/testdata/expected-plans/q20.txt index 9e495cc..4adb69a 100644 --- a/testdata/expected-plans/q20.txt +++ b/testdata/expected-plans/q20.txt @@ -39,15 +39,14 @@ SortPreservingMergeExec: [s_name@0 ASC NULLS LAST] HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[s_suppkey@1, s_name@2, s_address@3] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 ProjectionExec: expr=[n_nationkey@0 as n_nationkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = KENYA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = KENYA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= KENYA AND KENYA <= n_name_max@1 END, required_guarantees=[n_name in (KENYA)] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = KENYA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= KENYA AND KENYA <= n_name_max@1 END, required_guarantees=[n_name in (KENYA)] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([s_nationkey@3], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4 CoalesceBatchesExec: target_batch_size=8192 @@ -61,11 +60,10 @@ SortPreservingMergeExec: [s_name@0 ASC NULLS LAST] ProjectionExec: expr=[p_partkey@0 as p_partkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_name@1 LIKE blanched% - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE blanched% + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet:0..3454243, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:0..61547], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:61547..3452413, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:0..124924], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:124924..2056321, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_1.parquet:0..1526046, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:0..58347], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:58347..1788444, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_3.parquet:0..1785690]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE blanched% CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_availqty] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_0.parquet:0..11328263, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:0..9740046], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:9740046..10585781, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_2.parquet:0..9946761, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:0..10275813], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:10275813..10620507, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..20723615], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:20723615..20895812, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..20896109]]}, projection=[ps_partkey, ps_suppkey, ps_availqty] ProjectionExec: expr=[0.5 * CAST(sum(lineitem.l_quantity)@2 AS Float64) as Float64(0.5) * sum(lineitem.l_quantity), l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey] AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] CoalesceBatchesExec: target_batch_size=8192 @@ -74,21 +72,21 @@ SortPreservingMergeExec: [s_name@0 ASC NULLS LAST] ProjectionExec: expr=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey, l_quantity@2 as l_quantity] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@3 >= 1993-01-01 AND l_shipdate@3 < 1994-01-01 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], predicate=l_shipdate@10 >= 1993-01-01 AND l_shipdate@10 < 1994-01-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1993-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1994-01-01 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], predicate=l_shipdate@10 >= 1993-01-01 AND l_shipdate@10 < 1994-01-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1993-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1994-01-01 END, required_guarantees=[] RaySQL Plan =========== -Query Stage #0 (1 -> 4): +Query Stage #0 (2 -> 4): ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) ProjectionExec: expr=[n_nationkey@0 as n_nationkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = KENYA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = KENYA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= KENYA AND KENYA <= n_name_max@1 END, required_guarantees=[n_name in (KENYA)] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = KENYA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= KENYA AND KENYA <= n_name_max@1 END, required_guarantees=[n_name in (KENYA)] -Query Stage #1 (2 -> 4): +Query Stage #1 (4 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4)) - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey] Query Stage #2 (4 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) @@ -99,16 +97,16 @@ ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "s_suppke CoalesceBatchesExec: target_batch_size=8192 ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4)) -Query Stage #3 (2 -> 4): +Query Stage #3 (4 -> 4): ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) ProjectionExec: expr=[p_partkey@0 as p_partkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_name@1 LIKE blanched% - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE blanched% + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet:0..3454243, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:0..61547], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:61547..3452413, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:0..124924], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:124924..2056321, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_1.parquet:0..1526046, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:0..58347], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:58347..1788444, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_3.parquet:0..1785690]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE blanched% Query Stage #4 (4 -> 4): ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_availqty] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_0.parquet:0..11328263, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:0..9740046], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:9740046..10585781, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_2.parquet:0..9946761, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:0..10275813], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:10275813..10620507, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..20723615], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:20723615..20895812, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..20896109]]}, projection=[ps_partkey, ps_suppkey, ps_availqty] Query Stage #5 (4 -> 4): ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }, Column { name: "ps_suppkey", index: 1 }], 4)) @@ -125,7 +123,7 @@ ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "l_partke ProjectionExec: expr=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey, l_quantity@2 as l_quantity] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@3 >= 1993-01-01 AND l_shipdate@3 < 1994-01-01 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], predicate=l_shipdate@10 >= 1993-01-01 AND l_shipdate@10 < 1994-01-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1993-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1994-01-01 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], predicate=l_shipdate@10 >= 1993-01-01 AND l_shipdate@10 < 1994-01-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1993-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1994-01-01 END, required_guarantees=[] Query Stage #7 (4 -> 4): ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4)) diff --git a/testdata/expected-plans/q21.txt b/testdata/expected-plans/q21.txt index f4a9f43..be86399 100644 --- a/testdata/expected-plans/q21.txt +++ b/testdata/expected-plans/q21.txt @@ -55,11 +55,11 @@ GlobalLimitExec: skip=0, fetch=100 HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@1)], projection=[s_name@1, l_orderkey@3, l_suppkey@4] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 ProjectionExec: expr=[n_nationkey@0 as n_nationkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = ARGENTINA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ARGENTINA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ARGENTINA AND ARGENTINA <= n_name_max@1 END, required_guarantees=[n_name in (ARGENTINA)] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ARGENTINA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ARGENTINA AND ARGENTINA <= n_name_max@1 END, required_guarantees=[n_name in (ARGENTINA)] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([s_nationkey@1], 4), input_partitions=4 CoalesceBatchesExec: target_batch_size=8192 @@ -69,58 +69,57 @@ GlobalLimitExec: skip=0, fetch=100 ProjectionExec: expr=[o_orderkey@0 as o_orderkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderstatus@1 = F - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_orderstatus], predicate=o_orderstatus@2 = F, pruning_predicate=CASE WHEN o_orderstatus_null_count@2 = o_orderstatus_row_count@3 THEN false ELSE o_orderstatus_min@0 <= F AND F <= o_orderstatus_max@1 END, required_guarantees=[o_orderstatus in (F)] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_orderstatus], predicate=o_orderstatus@2 = F, pruning_predicate=CASE WHEN o_orderstatus_null_count@2 = o_orderstatus_row_count@3 THEN false ELSE o_orderstatus_min@0 <= F AND F <= o_orderstatus_max@1 END, required_guarantees=[o_orderstatus in (F)] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_orderkey@2], 4), input_partitions=4 CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_nationkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_suppkey@1], 4), input_partitions=4 ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_receiptdate@3 > l_commitdate@2 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_suppkey] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_receiptdate@3 > l_commitdate@2 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 RaySQL Plan =========== -Query Stage #0 (1 -> 4): +Query Stage #0 (2 -> 4): ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) ProjectionExec: expr=[n_nationkey@0 as n_nationkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = ARGENTINA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ARGENTINA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ARGENTINA AND ARGENTINA <= n_name_max@1 END, required_guarantees=[n_name in (ARGENTINA)] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ARGENTINA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ARGENTINA AND ARGENTINA <= n_name_max@1 END, required_guarantees=[n_name in (ARGENTINA)] Query Stage #1 (4 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) ProjectionExec: expr=[o_orderkey@0 as o_orderkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderstatus@1 = F - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_orderstatus], predicate=o_orderstatus@2 = F, pruning_predicate=CASE WHEN o_orderstatus_null_count@2 = o_orderstatus_row_count@3 THEN false ELSE o_orderstatus_min@0 <= F AND F <= o_orderstatus_max@1 END, required_guarantees=[o_orderstatus in (F)] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_orderstatus], predicate=o_orderstatus@2 = F, pruning_predicate=CASE WHEN o_orderstatus_null_count@2 = o_orderstatus_row_count@3 THEN false ELSE o_orderstatus_min@0 <= F AND F <= o_orderstatus_max@1 END, required_guarantees=[o_orderstatus in (F)] -Query Stage #2 (2 -> 4): +Query Stage #2 (4 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_nationkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_nationkey] Query Stage #3 (4 -> 4): ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4)) ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_receiptdate@3 > l_commitdate@2 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 Query Stage #4 (4 -> 4): ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "l_orderkey", index: 2 }], 4)) @@ -151,14 +150,14 @@ ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "l_orderk Query Stage #7 (4 -> 4): ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_suppkey] Query Stage #8 (4 -> 4): ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_receiptdate@3 > l_commitdate@2 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 Query Stage #9 (4 -> 4): ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "s_name", index: 0 }], 4)) diff --git a/testdata/expected-plans/q22.txt b/testdata/expected-plans/q22.txt index b770c11..427795a 100644 --- a/testdata/expected-plans/q22.txt +++ b/testdata/expected-plans/q22.txt @@ -37,17 +37,17 @@ SortPreservingMergeExec: [cntrycode@0 ASC NULLS LAST] ProjectionExec: expr=[c_acctbal@1 as c_acctbal] CoalesceBatchesExec: target_batch_size=8192 FilterExec: c_acctbal@1 > Some(0),11,2 AND Use substr(c_phone@0, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_phone, c_acctbal], predicate=c_acctbal@5 > Some(0),11,2 AND Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) AND c_acctbal@5 > Some(0),11,2, pruning_predicate=CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END AND CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_0.parquet:0..3472684, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:0..3399474], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:3399474..3842490, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_2.parquet:0..3498085, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:0..2931057], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:2931057..3110748, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..6692467], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6692467..6784093, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..6780531]]}, projection=[c_phone, c_acctbal], predicate=c_acctbal@5 > Some(0),11,2 AND Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) AND c_acctbal@5 > Some(0),11,2, pruning_predicate=CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END AND CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END, required_guarantees=[] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(c_custkey@0, o_custkey@0)], projection=[c_phone@1, c_acctbal@2] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 CoalesceBatchesExec: target_batch_size=8192 FilterExec: Use substr(c_phone@1, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_phone, c_acctbal], predicate=Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_0.parquet:0..3472684, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:0..3399474], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:3399474..3842490, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_2.parquet:0..3498085, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:0..2931057], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:2931057..3110748, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..6692467], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6692467..6784093, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..6780531]]}, projection=[c_custkey, c_phone, c_acctbal], predicate=Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([o_custkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_custkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_custkey] RaySQL Plan =========== @@ -58,17 +58,17 @@ ShuffleWriterExec(stage_id=0, output_partitioning=UnknownPartitioning(4)) ProjectionExec: expr=[c_acctbal@1 as c_acctbal] CoalesceBatchesExec: target_batch_size=8192 FilterExec: c_acctbal@1 > Some(0),11,2 AND Use substr(c_phone@0, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_phone, c_acctbal], predicate=c_acctbal@5 > Some(0),11,2 AND Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) AND c_acctbal@5 > Some(0),11,2, pruning_predicate=CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END AND CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_0.parquet:0..3472684, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:0..3399474], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:3399474..3842490, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_2.parquet:0..3498085, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:0..2931057], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:2931057..3110748, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..6692467], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6692467..6784093, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..6780531]]}, projection=[c_phone, c_acctbal], predicate=c_acctbal@5 > Some(0),11,2 AND Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) AND c_acctbal@5 > Some(0),11,2, pruning_predicate=CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END AND CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END, required_guarantees=[] Query Stage #1 (4 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) CoalesceBatchesExec: target_batch_size=8192 FilterExec: Use substr(c_phone@1, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_phone, c_acctbal], predicate=Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_0.parquet:0..3472684, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:0..3399474], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:3399474..3842490, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_2.parquet:0..3498085, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:0..2931057], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:2931057..3110748, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..6692467], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6692467..6784093, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..6780531]]}, projection=[c_custkey, c_phone, c_acctbal], predicate=Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) Query Stage #2 (4 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "o_custkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_custkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_custkey] Query Stage #3 (4 -> 4): ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "cntrycode", index: 0 }], 4)) diff --git a/testdata/expected-plans/q3.txt b/testdata/expected-plans/q3.txt index 2dc9f64..5fbfd5b 100644 --- a/testdata/expected-plans/q3.txt +++ b/testdata/expected-plans/q3.txt @@ -40,18 +40,18 @@ GlobalLimitExec: skip=0, fetch=10 ProjectionExec: expr=[c_custkey@0 as c_custkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: c_mktsegment@1 = BUILDING - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_mktsegment], predicate=c_mktsegment@6 = BUILDING, pruning_predicate=CASE WHEN c_mktsegment_null_count@2 = c_mktsegment_row_count@3 THEN false ELSE c_mktsegment_min@0 <= BUILDING AND BUILDING <= c_mktsegment_max@1 END, required_guarantees=[c_mktsegment in (BUILDING)] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_0.parquet:0..3472684, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:0..3399474], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:3399474..3842490, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_2.parquet:0..3498085, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:0..2931057], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:2931057..3110748, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..6692467], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6692467..6784093, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..6780531]]}, projection=[c_custkey, c_mktsegment], predicate=c_mktsegment@6 = BUILDING, pruning_predicate=CASE WHEN c_mktsegment_null_count@2 = c_mktsegment_row_count@3 THEN false ELSE c_mktsegment_min@0 <= BUILDING AND BUILDING <= c_mktsegment_max@1 END, required_guarantees=[c_mktsegment in (BUILDING)] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4 CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@2 < 1995-03-15 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], predicate=o_orderdate@4 < 1995-03-15, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@0 < 1995-03-15 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], predicate=o_orderdate@4 < 1995-03-15, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@0 < 1995-03-15 END, required_guarantees=[] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@3 > 1995-03-15 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 > 1995-03-15, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 > 1995-03-15 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 > 1995-03-15, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 > 1995-03-15 END, required_guarantees=[] RaySQL Plan =========== @@ -61,13 +61,13 @@ ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "c_custke ProjectionExec: expr=[c_custkey@0 as c_custkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: c_mktsegment@1 = BUILDING - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_mktsegment], predicate=c_mktsegment@6 = BUILDING, pruning_predicate=CASE WHEN c_mktsegment_null_count@2 = c_mktsegment_row_count@3 THEN false ELSE c_mktsegment_min@0 <= BUILDING AND BUILDING <= c_mktsegment_max@1 END, required_guarantees=[c_mktsegment in (BUILDING)] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_0.parquet:0..3472684, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:0..3399474], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:3399474..3842490, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_2.parquet:0..3498085, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:0..2931057], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:2931057..3110748, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..6692467], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6692467..6784093, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..6780531]]}, projection=[c_custkey, c_mktsegment], predicate=c_mktsegment@6 = BUILDING, pruning_predicate=CASE WHEN c_mktsegment_null_count@2 = c_mktsegment_row_count@3 THEN false ELSE c_mktsegment_min@0 <= BUILDING AND BUILDING <= c_mktsegment_max@1 END, required_guarantees=[c_mktsegment in (BUILDING)] Query Stage #1 (4 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@2 < 1995-03-15 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], predicate=o_orderdate@4 < 1995-03-15, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@0 < 1995-03-15 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], predicate=o_orderdate@4 < 1995-03-15, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@0 < 1995-03-15 END, required_guarantees=[] Query Stage #2 (4 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) @@ -83,7 +83,7 @@ ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "l_orderk ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@3 > 1995-03-15 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 > 1995-03-15, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 > 1995-03-15 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 > 1995-03-15, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 > 1995-03-15 END, required_guarantees=[] Query Stage #4 (4 -> 4): ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }, Column { name: "o_orderdate", index: 1 }, Column { name: "o_shippriority", index: 2 }], 4)) diff --git a/testdata/expected-plans/q4.txt b/testdata/expected-plans/q4.txt index 84642bd..a8e319d 100644 --- a/testdata/expected-plans/q4.txt +++ b/testdata/expected-plans/q4.txt @@ -31,13 +31,13 @@ SortPreservingMergeExec: [o_orderpriority@0 ASC NULLS LAST] ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_orderpriority@2 as o_orderpriority] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@1 >= 1995-04-01 AND o_orderdate@1 < 1995-07-01 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_orderdate, o_orderpriority], predicate=o_orderdate@4 >= 1995-04-01 AND o_orderdate@4 < 1995-07-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-04-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-07-01 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_orderdate, o_orderpriority], predicate=o_orderdate@4 >= 1995-04-01 AND o_orderdate@4 < 1995-07-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-04-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-07-01 END, required_guarantees=[] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 ProjectionExec: expr=[l_orderkey@0 as l_orderkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_receiptdate@2 > l_commitdate@1 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 RaySQL Plan =========== @@ -47,14 +47,14 @@ ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "o_orderk ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_orderpriority@2 as o_orderpriority] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@1 >= 1995-04-01 AND o_orderdate@1 < 1995-07-01 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_orderdate, o_orderpriority], predicate=o_orderdate@4 >= 1995-04-01 AND o_orderdate@4 < 1995-07-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-04-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-07-01 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_orderdate, o_orderpriority], predicate=o_orderdate@4 >= 1995-04-01 AND o_orderdate@4 < 1995-07-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-04-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-07-01 END, required_guarantees=[] Query Stage #1 (4 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) ProjectionExec: expr=[l_orderkey@0 as l_orderkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_receiptdate@2 > l_commitdate@1 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 Query Stage #2 (4 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "o_orderpriority", index: 0 }], 4)) diff --git a/testdata/expected-plans/q5.txt b/testdata/expected-plans/q5.txt index a0a761d..f6270c5 100644 --- a/testdata/expected-plans/q5.txt +++ b/testdata/expected-plans/q5.txt @@ -39,19 +39,19 @@ SortPreservingMergeExec: [revenue@1 DESC] HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, n_name@3] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 ProjectionExec: expr=[r_regionkey@0 as r_regionkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: r_name@1 = AFRICA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = AFRICA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= AFRICA AND AFRICA <= r_name_max@1 END, required_guarantees=[r_name in (AFRICA)] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/8NlZWnksIQjnYDPs_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = AFRICA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= AFRICA AND AFRICA <= r_name_max@1 END, required_guarantees=[r_name in (AFRICA)] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([n_regionkey@3], 4), input_partitions=4 ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, n_name@0 as n_name, n_regionkey@1 as n_regionkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, n_regionkey@2, l_extendedprice@3, l_discount@4] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] + RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4 ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@0 as s_nationkey] @@ -59,8 +59,7 @@ SortPreservingMergeExec: [revenue@1 DESC] HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1), (s_nationkey@1, c_nationkey@0)], projection=[s_nationkey@1, l_extendedprice@4, l_discount@5] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([s_suppkey@0, s_nationkey@1], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_suppkey@1, c_nationkey@0], 4), input_partitions=4 CoalesceBatchesExec: target_batch_size=8192 @@ -71,45 +70,45 @@ SortPreservingMergeExec: [revenue@1 DESC] HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_nationkey@1, o_orderkey@2] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_nationkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_0.parquet:0..3472684, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:0..3399474], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:3399474..3842490, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_2.parquet:0..3498085, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:0..2931057], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:2931057..3110748, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..6692467], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6692467..6784093, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..6780531]]}, projection=[c_custkey, c_nationkey] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4 ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@2 >= 1994-01-01 AND o_orderdate@2 < 1995-01-01 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1994-01-01 AND o_orderdate@4 < 1995-01-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1994-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-01-01 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1994-01-01 AND o_orderdate@4 < 1995-01-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1994-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-01-01 END, required_guarantees=[] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] RaySQL Plan =========== -Query Stage #0 (1 -> 4): +Query Stage #0 (2 -> 4): ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) ProjectionExec: expr=[r_regionkey@0 as r_regionkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: r_name@1 = AFRICA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = AFRICA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= AFRICA AND AFRICA <= r_name_max@1 END, required_guarantees=[r_name in (AFRICA)] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/8NlZWnksIQjnYDPs_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = AFRICA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= AFRICA AND AFRICA <= r_name_max@1 END, required_guarantees=[r_name in (AFRICA)] -Query Stage #1 (1 -> 4): +Query Stage #1 (2 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] -Query Stage #2 (2 -> 4): +Query Stage #2 (4 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }, Column { name: "s_nationkey", index: 1 }], 4)) - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] Query Stage #3 (4 -> 4): ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_nationkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_0.parquet:0..3472684, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:0..3399474], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:3399474..3842490, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_2.parquet:0..3498085, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:0..2931057], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:2931057..3110748, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..6692467], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6692467..6784093, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..6780531]]}, projection=[c_custkey, c_nationkey] Query Stage #4 (4 -> 4): ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@2 >= 1994-01-01 AND o_orderdate@2 < 1995-01-01 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1994-01-01 AND o_orderdate@4 < 1995-01-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1994-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-01-01 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1994-01-01 AND o_orderdate@4 < 1995-01-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1994-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-01-01 END, required_guarantees=[] Query Stage #5 (4 -> 4): ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "o_orderkey", index: 1 }], 4)) @@ -122,7 +121,7 @@ ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "o_orderk Query Stage #6 (4 -> 4): ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] Query Stage #7 (4 -> 4): ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "l_suppkey", index: 1 }, Column { name: "c_nationkey", index: 0 }], 4)) diff --git a/testdata/expected-plans/q6.txt b/testdata/expected-plans/q6.txt index d04ef4f..9d62863 100644 --- a/testdata/expected-plans/q6.txt +++ b/testdata/expected-plans/q6.txt @@ -17,7 +17,7 @@ ProjectionExec: expr=[sum(lineitem.l_extendedprice * lineitem.l_discount)@0 as r ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@3 >= 1994-01-01 AND l_shipdate@3 < 1995-01-01 AND l_discount@2 >= Some(3),11,2 AND l_discount@2 <= Some(5),11,2 AND l_quantity@0 < Some(2400),11,2 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1994-01-01 AND l_shipdate@10 < 1995-01-01 AND l_discount@6 >= Some(3),11,2 AND l_discount@6 <= Some(5),11,2 AND l_quantity@4 < Some(2400),11,2, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1994-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-01-01 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_max@4 >= Some(3),11,2 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_min@7 <= Some(5),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@8 < Some(2400),11,2 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1994-01-01 AND l_shipdate@10 < 1995-01-01 AND l_discount@6 >= Some(3),11,2 AND l_discount@6 <= Some(5),11,2 AND l_quantity@4 < Some(2400),11,2, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1994-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-01-01 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_max@4 >= Some(3),11,2 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_min@7 <= Some(5),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@8 < Some(2400),11,2 END, required_guarantees=[] RaySQL Plan =========== @@ -28,7 +28,7 @@ ShuffleWriterExec(stage_id=0, output_partitioning=UnknownPartitioning(4)) ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@3 >= 1994-01-01 AND l_shipdate@3 < 1995-01-01 AND l_discount@2 >= Some(3),11,2 AND l_discount@2 <= Some(5),11,2 AND l_quantity@0 < Some(2400),11,2 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1994-01-01 AND l_shipdate@10 < 1995-01-01 AND l_discount@6 >= Some(3),11,2 AND l_discount@6 <= Some(5),11,2 AND l_quantity@4 < Some(2400),11,2, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1994-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-01-01 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_max@4 >= Some(3),11,2 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_min@7 <= Some(5),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@8 < Some(2400),11,2 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1994-01-01 AND l_shipdate@10 < 1995-01-01 AND l_discount@6 >= Some(3),11,2 AND l_discount@6 <= Some(5),11,2 AND l_quantity@4 < Some(2400),11,2, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1994-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-01-01 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_max@4 >= Some(3),11,2 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_min@7 <= Some(5),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@8 < Some(2400),11,2 END, required_guarantees=[] Query Stage #1 (1 -> 1): ProjectionExec: expr=[sum(lineitem.l_extendedprice * lineitem.l_discount)@0 as revenue] diff --git a/testdata/expected-plans/q7.txt b/testdata/expected-plans/q7.txt index cc5671c..ed4009c 100644 --- a/testdata/expected-plans/q7.txt +++ b/testdata/expected-plans/q7.txt @@ -42,10 +42,10 @@ SortPreservingMergeExec: [supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS L HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], filter=n_name@0 = GERMANY AND n_name@1 = IRAQ OR n_name@0 = IRAQ AND n_name@1 = GERMANY, projection=[n_name@1, l_extendedprice@2, l_discount@3, l_shipdate@4, n_name@6] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = IRAQ OR n_name@1 = GERMANY - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = IRAQ OR n_name@1 = GERMANY, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = IRAQ OR n_name@1 = GERMANY, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([c_nationkey@3], 4), input_partitions=4 ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_shipdate@3 as l_shipdate, c_nationkey@4 as c_nationkey, n_name@0 as n_name] @@ -53,10 +53,10 @@ SortPreservingMergeExec: [supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS L HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@0)], projection=[n_name@1, l_extendedprice@3, l_discount@4, l_shipdate@5, c_nationkey@6] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = GERMANY OR n_name@1 = IRAQ - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = GERMANY OR n_name@1 = IRAQ, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END, required_guarantees=[n_name in (IRAQ, GERMANY)] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = GERMANY OR n_name@1 = IRAQ, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([s_nationkey@0], 4), input_partitions=4 ProjectionExec: expr=[s_nationkey@1 as s_nationkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, l_shipdate@4 as l_shipdate, c_nationkey@0 as c_nationkey] @@ -64,78 +64,79 @@ SortPreservingMergeExec: [supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS L HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@4)], projection=[c_nationkey@1, s_nationkey@2, l_extendedprice@3, l_discount@4, l_shipdate@5] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_nationkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_0.parquet:0..3472684, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:0..3399474], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:3399474..3842490, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_2.parquet:0..3498085, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:0..2931057], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:2931057..3110748, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..6692467], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6692467..6784093, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..6780531]]}, projection=[c_custkey, c_nationkey] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([o_custkey@4], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_orderkey@1, o_orderkey@0)], projection=[s_nationkey@0, l_extendedprice@2, l_discount@3, l_shipdate@4, o_custkey@6] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_orderkey@1], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5, l_shipdate@6] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_suppkey@1], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@4 >= 1995-01-01 AND l_shipdate@4 <= 1996-12-31 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-01-01 AND l_shipdate@10 <= 1996-12-31, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 <= 1996-12-31 END, required_guarantees=[] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey] + ProjectionExec: expr=[s_nationkey@1 as s_nationkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, l_shipdate@4 as l_shipdate, o_custkey@0 as o_custkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@1)], projection=[o_custkey@1, s_nationkey@2, l_extendedprice@4, l_discount@5, l_shipdate@6] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_custkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_orderkey@1], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5, l_shipdate@6] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_suppkey@1], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@4 >= 1995-01-01 AND l_shipdate@4 <= 1996-12-31 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-01-01 AND l_shipdate@10 <= 1996-12-31, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 <= 1996-12-31 END, required_guarantees=[] RaySQL Plan =========== -Query Stage #0 (1 -> 4): +Query Stage #0 (2 -> 4): ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = IRAQ OR n_name@1 = GERMANY - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = IRAQ OR n_name@1 = GERMANY, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = IRAQ OR n_name@1 = GERMANY, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] -Query Stage #1 (1 -> 4): +Query Stage #1 (2 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = GERMANY OR n_name@1 = IRAQ - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = GERMANY OR n_name@1 = IRAQ, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END, required_guarantees=[n_name in (IRAQ, GERMANY)] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = GERMANY OR n_name@1 = IRAQ, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] Query Stage #2 (4 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_nationkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_0.parquet:0..3472684, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:0..3399474], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:3399474..3842490, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_2.parquet:0..3498085, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:0..2931057], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:2931057..3110748, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..6692467], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6692467..6784093, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..6780531]]}, projection=[c_custkey, c_nationkey] -Query Stage #3 (2 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] +Query Stage #3 (4 -> 4): +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_custkey] Query Stage #4 (4 -> 4): -ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4)) +ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + +Query Stage #5 (4 -> 4): +ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4)) CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@4 >= 1995-01-01 AND l_shipdate@4 <= 1996-12-31 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-01-01 AND l_shipdate@10 <= 1996-12-31, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 <= 1996-12-31 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-01-01 AND l_shipdate@10 <= 1996-12-31, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 <= 1996-12-31 END, required_guarantees=[] -Query Stage #5 (4 -> 4): -ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "l_orderkey", index: 1 }], 4)) +Query Stage #6 (4 -> 4): +ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "l_orderkey", index: 1 }], 4)) CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5, l_shipdate@6] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) + ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4)) - -Query Stage #6 (4 -> 4): -ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey] + ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4)) Query Stage #7 (4 -> 4): ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "o_custkey", index: 4 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_orderkey@1, o_orderkey@0)], projection=[s_nationkey@0, l_extendedprice@2, l_discount@3, l_shipdate@4, o_custkey@6] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "l_orderkey", index: 1 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) + ProjectionExec: expr=[s_nationkey@1 as s_nationkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, l_shipdate@4 as l_shipdate, o_custkey@0 as o_custkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@1)], projection=[o_custkey@1, s_nationkey@2, l_extendedprice@4, l_discount@5, l_shipdate@6] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "l_orderkey", index: 1 }], 4)) Query Stage #8 (4 -> 4): ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "s_nationkey", index: 0 }], 4)) diff --git a/testdata/expected-plans/q8.txt b/testdata/expected-plans/q8.txt index f1bcc00..42479de 100644 --- a/testdata/expected-plans/q8.txt +++ b/testdata/expected-plans/q8.txt @@ -50,27 +50,27 @@ SortPreservingMergeExec: [o_year@0 ASC NULLS LAST] HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, o_orderdate@3, n_name@5] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 ProjectionExec: expr=[r_regionkey@0 as r_regionkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: r_name@1 = MIDDLE EAST - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = MIDDLE EAST, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= MIDDLE EAST AND MIDDLE EAST <= r_name_max@1 END, required_guarantees=[r_name in (MIDDLE EAST)] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/8NlZWnksIQjnYDPs_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = MIDDLE EAST, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= MIDDLE EAST AND MIDDLE EAST <= r_name_max@1 END, required_guarantees=[r_name in (MIDDLE EAST)] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([n_regionkey@3], 4), input_partitions=4 ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, o_orderdate@3 as o_orderdate, n_regionkey@4 as n_regionkey, n_name@0 as n_name] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, l_extendedprice@2, l_discount@3, o_orderdate@5, n_regionkey@6] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] + RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4 ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, n_regionkey@0 as n_regionkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, c_nationkey@4)], projection=[n_regionkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@5] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_regionkey] + RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_regionkey] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([c_nationkey@4], 4), input_partitions=4 ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, c_nationkey@0 as c_nationkey] @@ -78,7 +78,7 @@ SortPreservingMergeExec: [o_year@0 ASC NULLS LAST] HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@3)], projection=[c_nationkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@6] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_nationkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_0.parquet:0..3472684, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:0..3399474], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:3399474..3842490, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_2.parquet:0..3498085, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:0..2931057], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:2931057..3110748, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..6692467], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6692467..6784093, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..6780531]]}, projection=[c_custkey, c_nationkey] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([o_custkey@3], 4), input_partitions=4 ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, o_custkey@0 as o_custkey, o_orderdate@1 as o_orderdate] @@ -88,7 +88,7 @@ SortPreservingMergeExec: [o_year@0 ASC NULLS LAST] RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@2 >= 1995-01-01 AND o_orderdate@2 <= 1996-12-31 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1995-01-01 AND o_orderdate@4 <= 1996-12-31, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 <= 1996-12-31 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1995-01-01 AND o_orderdate@4 <= 1996-12-31, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 <= 1996-12-31 END, required_guarantees=[] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@0 as s_nationkey] @@ -96,8 +96,7 @@ SortPreservingMergeExec: [o_year@0 ASC NULLS LAST] HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_suppkey@1], 4), input_partitions=4 CoalesceBatchesExec: target_batch_size=8192 @@ -107,54 +106,53 @@ SortPreservingMergeExec: [o_year@0 ASC NULLS LAST] ProjectionExec: expr=[p_partkey@0 as p_partkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_type@1 = LARGE PLATED STEEL - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_type], predicate=p_type@4 = LARGE PLATED STEEL, pruning_predicate=CASE WHEN p_type_null_count@2 = p_type_row_count@3 THEN false ELSE p_type_min@0 <= LARGE PLATED STEEL AND LARGE PLATED STEEL <= p_type_max@1 END, required_guarantees=[p_type in (LARGE PLATED STEEL)] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet:0..3454243, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:0..61547], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:61547..3452413, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:0..124924], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:124924..2056321, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_1.parquet:0..1526046, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:0..58347], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:58347..1788444, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_3.parquet:0..1785690]]}, projection=[p_partkey, p_type], predicate=p_type@4 = LARGE PLATED STEEL, pruning_predicate=CASE WHEN p_type_null_count@2 = p_type_row_count@3 THEN false ELSE p_type_min@0 <= LARGE PLATED STEEL AND LARGE PLATED STEEL <= p_type_max@1 END, required_guarantees=[p_type in (LARGE PLATED STEEL)] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_partkey@1], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] RaySQL Plan =========== -Query Stage #0 (1 -> 4): +Query Stage #0 (2 -> 4): ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) ProjectionExec: expr=[r_regionkey@0 as r_regionkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: r_name@1 = MIDDLE EAST - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = MIDDLE EAST, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= MIDDLE EAST AND MIDDLE EAST <= r_name_max@1 END, required_guarantees=[r_name in (MIDDLE EAST)] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/8NlZWnksIQjnYDPs_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = MIDDLE EAST, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= MIDDLE EAST AND MIDDLE EAST <= r_name_max@1 END, required_guarantees=[r_name in (MIDDLE EAST)] -Query Stage #1 (1 -> 4): +Query Stage #1 (2 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] -Query Stage #2 (1 -> 4): +Query Stage #2 (2 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_regionkey] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_regionkey] Query Stage #3 (4 -> 4): ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3391370], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3391370..6782740], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6782740..6784675, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3389435], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3389435..6780803]]}, projection=[c_custkey, c_nationkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_0.parquet:0..3472684, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:0..3399474], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:3399474..3842490, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_2.parquet:0..3498085, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:0..2931057], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:2931057..3110748, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..6692467], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6692467..6784093, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..6780531]]}, projection=[c_custkey, c_nationkey] Query Stage #4 (4 -> 4): ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@2 >= 1995-01-01 AND o_orderdate@2 <= 1996-12-31 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1995-01-01 AND o_orderdate@4 <= 1996-12-31, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 <= 1996-12-31 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1995-01-01 AND o_orderdate@4 <= 1996-12-31, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 <= 1996-12-31 END, required_guarantees=[] -Query Stage #5 (2 -> 4): +Query Stage #5 (4 -> 4): ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] -Query Stage #6 (2 -> 4): +Query Stage #6 (4 -> 4): ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) ProjectionExec: expr=[p_partkey@0 as p_partkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_type@1 = LARGE PLATED STEEL - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_type], predicate=p_type@4 = LARGE PLATED STEEL, pruning_predicate=CASE WHEN p_type_null_count@2 = p_type_row_count@3 THEN false ELSE p_type_min@0 <= LARGE PLATED STEEL AND LARGE PLATED STEEL <= p_type_max@1 END, required_guarantees=[p_type in (LARGE PLATED STEEL)] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet:0..3454243, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:0..61547], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:61547..3452413, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:0..124924], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:124924..2056321, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_1.parquet:0..1526046, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:0..58347], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:58347..1788444, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_3.parquet:0..1785690]]}, projection=[p_partkey, p_type], predicate=p_type@4 = LARGE PLATED STEEL, pruning_predicate=CASE WHEN p_type_null_count@2 = p_type_row_count@3 THEN false ELSE p_type_min@0 <= LARGE PLATED STEEL AND LARGE PLATED STEEL <= p_type_max@1 END, required_guarantees=[p_type in (LARGE PLATED STEEL)] Query Stage #7 (4 -> 4): ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "l_partkey", index: 1 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] Query Stage #8 (4 -> 4): ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4)) diff --git a/testdata/expected-plans/q9.txt b/testdata/expected-plans/q9.txt index 7537293..0a14b3a 100644 --- a/testdata/expected-plans/q9.txt +++ b/testdata/expected-plans/q9.txt @@ -38,8 +38,8 @@ SortPreservingMergeExec: [nation@0 ASC NULLS LAST,o_year@1 DESC] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[n_name@1, l_quantity@2, l_extendedprice@3, l_discount@4, ps_supplycost@6, o_orderdate@7] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] + RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([s_nationkey@3], 4), input_partitions=4 ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, ps_supplycost@5 as ps_supplycost, o_orderdate@0 as o_orderdate] @@ -47,7 +47,7 @@ SortPreservingMergeExec: [nation@0 ASC NULLS LAST,o_year@1 DESC] HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, l_quantity@3, l_extendedprice@4, l_discount@5, s_nationkey@6, ps_supplycost@7] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_orderdate] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_orderdate] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_quantity@2 as l_quantity, l_extendedprice@3 as l_extendedprice, l_discount@4 as l_discount, s_nationkey@5 as s_nationkey, ps_supplycost@0 as ps_supplycost] @@ -55,7 +55,7 @@ SortPreservingMergeExec: [nation@0 ASC NULLS LAST,o_year@1 DESC] HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@1, l_suppkey@2), (ps_partkey@0, l_partkey@1)], projection=[ps_supplycost@2, l_orderkey@3, l_quantity@6, l_extendedprice@7, l_discount@8, s_nationkey@9] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([ps_suppkey@1, ps_partkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_0.parquet:0..11328263, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:0..9740046], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:9740046..10585781, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_2.parquet:0..9946761, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:0..10275813], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:10275813..10620507, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..20723615], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:20723615..20895812, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..20896109]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_suppkey@2, l_partkey@1], 4), input_partitions=4 ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_partkey@2 as l_partkey, l_suppkey@3 as l_suppkey, l_quantity@4 as l_quantity, l_extendedprice@5 as l_extendedprice, l_discount@6 as l_discount, s_nationkey@0 as s_nationkey] @@ -63,8 +63,7 @@ SortPreservingMergeExec: [nation@0 ASC NULLS LAST,o_year@1 DESC] HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@2)], projection=[s_nationkey@1, l_orderkey@2, l_partkey@3, l_suppkey@4, l_quantity@5, l_extendedprice@6, l_discount@7] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_suppkey@2], 4), input_partitions=4 CoalesceBatchesExec: target_batch_size=8192 @@ -74,41 +73,40 @@ SortPreservingMergeExec: [nation@0 ASC NULLS LAST,o_year@1 DESC] ProjectionExec: expr=[p_partkey@0 as p_partkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_name@1 LIKE %moccasin% - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE %moccasin% + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet:0..3454243, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:0..61547], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:61547..3452413, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:0..124924], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:124924..2056321, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_1.parquet:0..1526046, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:0..58347], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:58347..1788444, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_3.parquet:0..1785690]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE %moccasin% CoalesceBatchesExec: target_batch_size=8192 RepartitionExec: partitioning=Hash([l_partkey@1], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] RaySQL Plan =========== -Query Stage #0 (1 -> 4): +Query Stage #0 (2 -> 4): ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] Query Stage #1 (4 -> 4): ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13635744], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13635744..27271488], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27271488..27277388, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13629844], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13629844..27265588]]}, projection=[o_orderkey, o_orderdate] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_orderdate] Query Stage #2 (4 -> 4): ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }, Column { name: "ps_partkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10449977], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10449977..20896603, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..3351], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:3351..10453328], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10453328..20903305]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_0.parquet:0..11328263, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:0..9740046], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:9740046..10585781, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_2.parquet:0..9946761, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:0..10275813], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:10275813..10620507, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..20723615], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:20723615..20895812, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..20896109]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] -Query Stage #3 (2 -> 4): +Query Stage #3 (4 -> 4): ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] -Query Stage #4 (2 -> 4): +Query Stage #4 (4 -> 4): ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) ProjectionExec: expr=[p_partkey@0 as p_partkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_name@1 LIKE %moccasin% - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE %moccasin% + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet:0..3454243, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:0..61547], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:61547..3452413, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:0..124924], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:124924..2056321, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_1.parquet:0..1526046, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:0..58347], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:58347..1788444, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_3.parquet:0..1785690]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE %moccasin% Query Stage #5 (4 -> 4): ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "l_partkey", index: 1 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52077273], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52077273..104126391, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..28155], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:28155..52105428], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52105428..104182701]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] Query Stage #6 (4 -> 4): ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "l_suppkey", index: 2 }], 4)) From 2f2eda1bd6b4731c7313c6c4291301d0349a016b Mon Sep 17 00:00:00 2001 From: Edmondo Porcu Date: Thu, 3 Oct 2024 14:51:10 -0400 Subject: [PATCH 04/14] Removing plans, reverting to single partition --- testdata/expected-plans/q1.txt | 48 ------ testdata/expected-plans/q10.txt | 130 ---------------- testdata/expected-plans/q11.txt | 175 --------------------- testdata/expected-plans/q12.txt | 73 --------- testdata/expected-plans/q13.txt | 78 ---------- testdata/expected-plans/q14.txt | 63 -------- testdata/expected-plans/q16.txt | 113 -------------- testdata/expected-plans/q17.txt | 88 ----------- testdata/expected-plans/q18.txt | 115 -------------- testdata/expected-plans/q19.txt | 66 -------- testdata/expected-plans/q2.txt | 264 -------------------------------- testdata/expected-plans/q20.txt | 152 ------------------ testdata/expected-plans/q21.txt | 188 ----------------------- testdata/expected-plans/q22.txt | 99 ------------ testdata/expected-plans/q3.txt | 110 ------------- testdata/expected-plans/q4.txt | 80 ---------- testdata/expected-plans/q5.txt | 176 --------------------- testdata/expected-plans/q6.txt | 38 ----- testdata/expected-plans/q7.txt | 183 ---------------------- testdata/expected-plans/q8.txt | 238 ---------------------------- testdata/expected-plans/q9.txt | 172 --------------------- 21 files changed, 2649 deletions(-) delete mode 100644 testdata/expected-plans/q1.txt delete mode 100644 testdata/expected-plans/q10.txt delete mode 100644 testdata/expected-plans/q11.txt delete mode 100644 testdata/expected-plans/q12.txt delete mode 100644 testdata/expected-plans/q13.txt delete mode 100644 testdata/expected-plans/q14.txt delete mode 100644 testdata/expected-plans/q16.txt delete mode 100644 testdata/expected-plans/q17.txt delete mode 100644 testdata/expected-plans/q18.txt delete mode 100644 testdata/expected-plans/q19.txt delete mode 100644 testdata/expected-plans/q2.txt delete mode 100644 testdata/expected-plans/q20.txt delete mode 100644 testdata/expected-plans/q21.txt delete mode 100644 testdata/expected-plans/q22.txt delete mode 100644 testdata/expected-plans/q3.txt delete mode 100644 testdata/expected-plans/q4.txt delete mode 100644 testdata/expected-plans/q5.txt delete mode 100644 testdata/expected-plans/q6.txt delete mode 100644 testdata/expected-plans/q7.txt delete mode 100644 testdata/expected-plans/q8.txt delete mode 100644 testdata/expected-plans/q9.txt diff --git a/testdata/expected-plans/q1.txt b/testdata/expected-plans/q1.txt deleted file mode 100644 index 27bea61..0000000 --- a/testdata/expected-plans/q1.txt +++ /dev/null @@ -1,48 +0,0 @@ -DataFusion Logical Plan -======================= - -Sort: lineitem.l_returnflag ASC NULLS LAST, lineitem.l_linestatus ASC NULLS LAST - Projection: lineitem.l_returnflag, lineitem.l_linestatus, sum(lineitem.l_quantity) AS sum_qty, sum(lineitem.l_extendedprice) AS sum_base_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS sum_disc_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax) AS sum_charge, avg(lineitem.l_quantity) AS avg_qty, avg(lineitem.l_extendedprice) AS avg_price, avg(lineitem.l_discount) AS avg_disc, count(*) AS count_order - Aggregate: groupBy=[[lineitem.l_returnflag, lineitem.l_linestatus]], aggr=[[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(__common_expr_1) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(__common_expr_1 * (Decimal128(Some(1),20,0) + lineitem.l_tax)) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(Int64(1)) AS count(*)]] - Projection: lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount) AS __common_expr_1, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_tax, lineitem.l_returnflag, lineitem.l_linestatus - Filter: lineitem.l_shipdate <= Date32("1998-09-24") - TableScan: lineitem projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], partial_filters=[lineitem.l_shipdate <= Date32("1998-09-24")] - -DataFusion Physical Plan -======================== - -SortPreservingMergeExec: [l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST] - SortExec: expr=[l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus, sum(lineitem.l_quantity)@2 as sum_qty, sum(lineitem.l_extendedprice)@3 as sum_base_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@4 as sum_disc_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax)@5 as sum_charge, avg(lineitem.l_quantity)@6 as avg_qty, avg(lineitem.l_extendedprice)@7 as avg_price, avg(lineitem.l_discount)@8 as avg_disc, count(*)@9 as count_order] - AggregateExec: mode=FinalPartitioned, gby=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(*)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_returnflag@0, l_linestatus@1], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[l_returnflag@5 as l_returnflag, l_linestatus@6 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(*)] - ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, l_quantity@0 as l_quantity, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_tax@3 as l_tax, l_returnflag@4 as l_returnflag, l_linestatus@5 as l_linestatus] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@6 <= 1998-09-24 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], predicate=l_shipdate@10 <= 1998-09-24, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@0 <= 1998-09-24 END, required_guarantees=[] - -RaySQL Plan -=========== - -Query Stage #0 (4 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "l_returnflag", index: 0 }, Column { name: "l_linestatus", index: 1 }], 4)) - AggregateExec: mode=Partial, gby=[l_returnflag@5 as l_returnflag, l_linestatus@6 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(*)] - ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, l_quantity@0 as l_quantity, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_tax@3 as l_tax, l_returnflag@4 as l_returnflag, l_linestatus@5 as l_linestatus] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@6 <= 1998-09-24 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], predicate=l_shipdate@10 <= 1998-09-24, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@0 <= 1998-09-24 END, required_guarantees=[] - -Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "l_returnflag", index: 0 }, Column { name: "l_linestatus", index: 1 }], 4)) - SortExec: expr=[l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus, sum(lineitem.l_quantity)@2 as sum_qty, sum(lineitem.l_extendedprice)@3 as sum_base_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@4 as sum_disc_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax)@5 as sum_charge, avg(lineitem.l_quantity)@6 as avg_qty, avg(lineitem.l_extendedprice)@7 as avg_price, avg(lineitem.l_discount)@8 as avg_disc, count(*)@9 as count_order] - AggregateExec: mode=FinalPartitioned, gby=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(*)] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "l_returnflag", index: 0 }, Column { name: "l_linestatus", index: 1 }], 4)) - -Query Stage #2 (4 -> 1): -SortPreservingMergeExec: [l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST] - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "l_returnflag", index: 0 }, Column { name: "l_linestatus", index: 1 }], 4)) - diff --git a/testdata/expected-plans/q10.txt b/testdata/expected-plans/q10.txt deleted file mode 100644 index 1d743e9..0000000 --- a/testdata/expected-plans/q10.txt +++ /dev/null @@ -1,130 +0,0 @@ -DataFusion Logical Plan -======================= - -Limit: skip=0, fetch=20 - Sort: revenue DESC NULLS FIRST, fetch=20 - Projection: customer.c_custkey, customer.c_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue, customer.c_acctbal, nation.n_name, customer.c_address, customer.c_phone, customer.c_comment - Aggregate: groupBy=[[customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment]], aggr=[[sum(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] - Projection: customer.c_custkey, customer.c_name, customer.c_address, customer.c_phone, customer.c_acctbal, customer.c_comment, lineitem.l_extendedprice, lineitem.l_discount, nation.n_name - Inner Join: customer.c_nationkey = nation.n_nationkey - Projection: customer.c_custkey, customer.c_name, customer.c_address, customer.c_nationkey, customer.c_phone, customer.c_acctbal, customer.c_comment, lineitem.l_extendedprice, lineitem.l_discount - Inner Join: orders.o_orderkey = lineitem.l_orderkey - Projection: customer.c_custkey, customer.c_name, customer.c_address, customer.c_nationkey, customer.c_phone, customer.c_acctbal, customer.c_comment, orders.o_orderkey - Inner Join: customer.c_custkey = orders.o_custkey - TableScan: customer projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] - Projection: orders.o_orderkey, orders.o_custkey - Filter: orders.o_orderdate >= Date32("1993-07-01") AND orders.o_orderdate < Date32("1993-10-01") - TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate], partial_filters=[orders.o_orderdate >= Date32("1993-07-01"), orders.o_orderdate < Date32("1993-10-01")] - Projection: lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount - Filter: lineitem.l_returnflag = Utf8("R") - TableScan: lineitem projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], partial_filters=[lineitem.l_returnflag = Utf8("R")] - TableScan: nation projection=[n_nationkey, n_name] - -DataFusion Physical Plan -======================== - -GlobalLimitExec: skip=0, fetch=20 - SortPreservingMergeExec: [revenue@2 DESC], fetch=20 - SortExec: TopK(fetch=20), expr=[revenue@2 DESC], preserve_partitioning=[true] - ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@7 as revenue, c_acctbal@2 as c_acctbal, n_name@4 as n_name, c_address@5 as c_address, c_phone@3 as c_phone, c_comment@6 as c_comment] - AggregateExec: mode=FinalPartitioned, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@2 as c_acctbal, c_phone@3 as c_phone, n_name@4 as n_name, c_address@5 as c_address, c_comment@6 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([c_custkey@0, c_name@1, c_acctbal@2, c_phone@3, n_name@4, c_address@5, c_comment@6], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@4 as c_acctbal, c_phone@3 as c_phone, n_name@8 as n_name, c_address@2 as c_address, c_comment@5 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - ProjectionExec: expr=[c_custkey@1 as c_custkey, c_name@2 as c_name, c_address@3 as c_address, c_phone@4 as c_phone, c_acctbal@5 as c_acctbal, c_comment@6 as c_comment, l_extendedprice@7 as l_extendedprice, l_discount@8 as l_discount, n_name@0 as n_name] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], projection=[n_name@1, c_custkey@2, c_name@3, c_address@4, c_phone@6, c_acctbal@7, c_comment@8, l_extendedprice@9, l_discount@10] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([c_nationkey@3], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@7, l_orderkey@0)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, l_extendedprice@9, l_discount@10] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_orderkey@7], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, o_orderkey@7] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_0.parquet:0..3472684, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:0..3399474], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:3399474..3842490, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_2.parquet:0..3498085, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:0..2931057], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:2931057..3110748, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..6692467], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6692467..6784093, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..6780531]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4 - ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 >= 1993-07-01 AND o_orderdate@2 < 1993-10-01 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1993-07-01 AND o_orderdate@4 < 1993-10-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1993-07-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1993-10-01 END, required_guarantees=[] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_returnflag@3 = R - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], predicate=l_returnflag@8 = R, pruning_predicate=CASE WHEN l_returnflag_null_count@2 = l_returnflag_row_count@3 THEN false ELSE l_returnflag_min@0 <= R AND R <= l_returnflag_max@1 END, required_guarantees=[l_returnflag in (R)] - -RaySQL Plan -=========== - -Query Stage #0 (2 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] - -Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_0.parquet:0..3472684, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:0..3399474], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:3399474..3842490, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_2.parquet:0..3498085, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:0..2931057], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:2931057..3110748, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..6692467], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6692467..6784093, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..6780531]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] - -Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) - ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 >= 1993-07-01 AND o_orderdate@2 < 1993-10-01 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1993-07-01 AND o_orderdate@4 < 1993-10-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1993-07-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1993-10-01 END, required_guarantees=[] - -Query Stage #3 (4 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "o_orderkey", index: 7 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, o_orderkey@7] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) - -Query Stage #4 (4 -> 4): -ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_returnflag@3 = R - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], predicate=l_returnflag@8 = R, pruning_predicate=CASE WHEN l_returnflag_null_count@2 = l_returnflag_row_count@3 THEN false ELSE l_returnflag_min@0 <= R AND R <= l_returnflag_max@1 END, required_guarantees=[l_returnflag in (R)] - -Query Stage #5 (4 -> 4): -ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "c_nationkey", index: 3 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@7, l_orderkey@0)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, l_extendedprice@9, l_discount@10] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "o_orderkey", index: 7 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - -Query Stage #6 (4 -> 4): -ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }, Column { name: "c_name", index: 1 }, Column { name: "c_acctbal", index: 2 }, Column { name: "c_phone", index: 3 }, Column { name: "n_name", index: 4 }, Column { name: "c_address", index: 5 }, Column { name: "c_comment", index: 6 }], 4)) - AggregateExec: mode=Partial, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@4 as c_acctbal, c_phone@3 as c_phone, n_name@8 as n_name, c_address@2 as c_address, c_comment@5 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - ProjectionExec: expr=[c_custkey@1 as c_custkey, c_name@2 as c_name, c_address@3 as c_address, c_phone@4 as c_phone, c_acctbal@5 as c_acctbal, c_comment@6 as c_comment, l_extendedprice@7 as l_extendedprice, l_discount@8 as l_discount, n_name@0 as n_name] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], projection=[n_name@1, c_custkey@2, c_name@3, c_address@4, c_phone@6, c_acctbal@7, c_comment@8, l_extendedprice@9, l_discount@10] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "c_nationkey", index: 3 }], 4)) - -Query Stage #7 (4 -> 4): -ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }, Column { name: "c_name", index: 1 }, Column { name: "c_acctbal", index: 3 }, Column { name: "c_phone", index: 6 }, Column { name: "n_name", index: 4 }, Column { name: "c_address", index: 5 }, Column { name: "c_comment", index: 7 }], 4)) - SortExec: TopK(fetch=20), expr=[revenue@2 DESC], preserve_partitioning=[true] - ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@7 as revenue, c_acctbal@2 as c_acctbal, n_name@4 as n_name, c_address@5 as c_address, c_phone@3 as c_phone, c_comment@6 as c_comment] - AggregateExec: mode=FinalPartitioned, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@2 as c_acctbal, c_phone@3 as c_phone, n_name@4 as n_name, c_address@5 as c_address, c_comment@6 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }, Column { name: "c_name", index: 1 }, Column { name: "c_acctbal", index: 2 }, Column { name: "c_phone", index: 3 }, Column { name: "n_name", index: 4 }, Column { name: "c_address", index: 5 }, Column { name: "c_comment", index: 6 }], 4)) - -Query Stage #8 (1 -> 1): -GlobalLimitExec: skip=0, fetch=20 - SortPreservingMergeExec: [revenue@2 DESC], fetch=20 - ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }, Column { name: "c_name", index: 1 }, Column { name: "c_acctbal", index: 3 }, Column { name: "c_phone", index: 6 }, Column { name: "n_name", index: 4 }, Column { name: "c_address", index: 5 }, Column { name: "c_comment", index: 7 }], 4)) - diff --git a/testdata/expected-plans/q11.txt b/testdata/expected-plans/q11.txt deleted file mode 100644 index 8b1556b..0000000 --- a/testdata/expected-plans/q11.txt +++ /dev/null @@ -1,175 +0,0 @@ -DataFusion Logical Plan -======================= - -Sort: value DESC NULLS FIRST - Projection: partsupp.ps_partkey, sum(partsupp.ps_supplycost * partsupp.ps_availqty) AS value - Inner Join: Filter: CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty) AS Decimal128(38, 15)) > __scalar_sq_1.sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001) - Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[sum(partsupp.ps_supplycost * CAST(partsupp.ps_availqty AS Decimal128(10, 0)))]] - Projection: partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost - Inner Join: supplier.s_nationkey = nation.n_nationkey - Projection: partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey - Inner Join: partsupp.ps_suppkey = supplier.s_suppkey - TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] - TableScan: supplier projection=[s_suppkey, s_nationkey] - Projection: nation.n_nationkey - Filter: nation.n_name = Utf8("ALGERIA") - TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("ALGERIA")] - SubqueryAlias: __scalar_sq_1 - Projection: CAST(CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty) AS Float64) * Float64(0.0001) AS Decimal128(38, 15)) - Aggregate: groupBy=[[]], aggr=[[sum(partsupp.ps_supplycost * CAST(partsupp.ps_availqty AS Decimal128(10, 0)))]] - Projection: partsupp.ps_availqty, partsupp.ps_supplycost - Inner Join: supplier.s_nationkey = nation.n_nationkey - Projection: partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey - Inner Join: partsupp.ps_suppkey = supplier.s_suppkey - TableScan: partsupp projection=[ps_suppkey, ps_availqty, ps_supplycost] - TableScan: supplier projection=[s_suppkey, s_nationkey] - Projection: nation.n_nationkey - Filter: nation.n_name = Utf8("ALGERIA") - TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("ALGERIA")] - -DataFusion Physical Plan -======================== - -SortPreservingMergeExec: [value@1 DESC] - SortExec: expr=[value@1 DESC], preserve_partitioning=[true] - ProjectionExec: expr=[ps_partkey@1 as ps_partkey, sum(partsupp.ps_supplycost * partsupp.ps_availqty)@2 as value] - NestedLoopJoinExec: join_type=Inner, filter=CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Decimal128(38, 15)) > sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)@1 - ProjectionExec: expr=[CAST(CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 AS Decimal128(38, 15)) as sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)] - AggregateExec: mode=Final, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] - CoalescePartitionsExec - AggregateExec: mode=Partial, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[ps_availqty@1, ps_supplycost@2] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ProjectionExec: expr=[n_nationkey@0 as n_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = ALGERIA - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4 - ProjectionExec: expr=[ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_nationkey@1, ps_availqty@3, ps_supplycost@4] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_0.parquet:0..11328263, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:0..9740046], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:9740046..10585781, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_2.parquet:0..9946761, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:0..10275813], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:10275813..10620507, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..20723615], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:20723615..20895812, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..20896109]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost] - AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[ps_partkey@1, ps_availqty@2, ps_supplycost@3] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ProjectionExec: expr=[n_nationkey@0 as n_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = ALGERIA - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_nationkey@3], 4), input_partitions=4 - ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_availqty@2 as ps_availqty, ps_supplycost@3 as ps_supplycost, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_availqty@4, ps_supplycost@5] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([ps_suppkey@1], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_0.parquet:0..11328263, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:0..9740046], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:9740046..10585781, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_2.parquet:0..9946761, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:0..10275813], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:10275813..10620507, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..20723615], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:20723615..20895812, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..20896109]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] - -RaySQL Plan -=========== - -Query Stage #0 (2 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ProjectionExec: expr=[n_nationkey@0 as n_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = ALGERIA - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] - -Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] - -Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_0.parquet:0..11328263, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:0..9740046], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:9740046..10585781, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_2.parquet:0..9946761, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:0..10275813], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:10275813..10620507, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..20723615], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:20723615..20895812, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..20896109]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost] - -Query Stage #3 (4 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) - ProjectionExec: expr=[ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_nationkey@1, ps_availqty@3, ps_supplycost@4] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4)) - -Query Stage #4 (4 -> 1): -ShuffleWriterExec(stage_id=4, output_partitioning=Hash([], 4)) - AggregateExec: mode=Partial, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[ps_availqty@1, ps_supplycost@2] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) - -Query Stage #5 (2 -> 4): -ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ProjectionExec: expr=[n_nationkey@0 as n_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = ALGERIA - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] - -Query Stage #6 (4 -> 4): -ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] - -Query Stage #7 (4 -> 4): -ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_0.parquet:0..11328263, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:0..9740046], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:9740046..10585781, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_2.parquet:0..9946761, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:0..10275813], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:10275813..10620507, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..20723615], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:20723615..20895812, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..20896109]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] - -Query Stage #8 (4 -> 4): -ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4)) - ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_availqty@2 as ps_availqty, ps_supplycost@3 as ps_supplycost, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_availqty@4, ps_supplycost@5] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }], 4)) - -Query Stage #9 (4 -> 4): -ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) - AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[ps_partkey@1, ps_availqty@2, ps_supplycost@3] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4)) - -Query Stage #10 (4 -> 4): -ShuffleWriterExec(stage_id=10, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) - SortExec: expr=[value@1 DESC], preserve_partitioning=[true] - ProjectionExec: expr=[ps_partkey@1 as ps_partkey, sum(partsupp.ps_supplycost * partsupp.ps_availqty)@2 as value] - NestedLoopJoinExec: join_type=Inner, filter=CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Decimal128(38, 15)) > sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)@1 - ProjectionExec: expr=[CAST(CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 AS Decimal128(38, 15)) as sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)] - AggregateExec: mode=Final, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] - CoalescePartitionsExec - ShuffleReaderExec(stage_id=4, input_partitioning=Hash([], 4)) - AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) - -Query Stage #11 (4 -> 1): -SortPreservingMergeExec: [value@1 DESC] - ShuffleReaderExec(stage_id=10, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) - diff --git a/testdata/expected-plans/q12.txt b/testdata/expected-plans/q12.txt deleted file mode 100644 index ba752e4..0000000 --- a/testdata/expected-plans/q12.txt +++ /dev/null @@ -1,73 +0,0 @@ -DataFusion Logical Plan -======================= - -Sort: lineitem.l_shipmode ASC NULLS LAST - Projection: lineitem.l_shipmode, sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END) AS high_line_count, sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END) AS low_line_count - Aggregate: groupBy=[[lineitem.l_shipmode]], aggr=[[sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)]] - Projection: orders.o_orderpriority, lineitem.l_shipmode - Inner Join: orders.o_orderkey = lineitem.l_orderkey - TableScan: orders projection=[o_orderkey, o_orderpriority] - Projection: lineitem.l_orderkey, lineitem.l_shipmode - Filter: (lineitem.l_shipmode = Utf8("FOB") OR lineitem.l_shipmode = Utf8("SHIP")) AND lineitem.l_receiptdate > lineitem.l_commitdate AND lineitem.l_shipdate < lineitem.l_commitdate AND lineitem.l_receiptdate >= Date32("1995-01-01") AND lineitem.l_receiptdate < Date32("1996-01-01") - TableScan: lineitem projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode], partial_filters=[lineitem.l_shipmode = Utf8("FOB") OR lineitem.l_shipmode = Utf8("SHIP"), lineitem.l_receiptdate > lineitem.l_commitdate, lineitem.l_shipdate < lineitem.l_commitdate, lineitem.l_receiptdate >= Date32("1995-01-01"), lineitem.l_receiptdate < Date32("1996-01-01")] - -DataFusion Physical Plan -======================== - -SortPreservingMergeExec: [l_shipmode@0 ASC NULLS LAST] - SortExec: expr=[l_shipmode@0 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[l_shipmode@0 as l_shipmode, sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)@1 as high_line_count, sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)@2 as low_line_count] - AggregateExec: mode=FinalPartitioned, gby=[l_shipmode@0 as l_shipmode], aggr=[sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_shipmode@0], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[l_shipmode@1 as l_shipmode], aggr=[sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)] - ProjectionExec: expr=[o_orderpriority@1 as o_orderpriority, l_shipmode@0 as l_shipmode] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_orderkey@0, o_orderkey@0)], projection=[l_shipmode@1, o_orderpriority@3] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_shipmode@4 as l_shipmode] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: (l_shipmode@4 = FOB OR l_shipmode@4 = SHIP) AND l_receiptdate@3 > l_commitdate@2 AND l_shipdate@1 < l_commitdate@2 AND l_receiptdate@3 >= 1995-01-01 AND l_receiptdate@3 < 1996-01-01 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode], predicate=(l_shipmode@14 = FOB OR l_shipmode@14 = SHIP) AND l_receiptdate@12 > l_commitdate@11 AND l_shipdate@10 < l_commitdate@11 AND l_receiptdate@12 >= 1995-01-01 AND l_receiptdate@12 < 1996-01-01, pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= FOB AND FOB <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= SHIP AND SHIP <= l_shipmode_max@1 END) AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_max@4 >= 1995-01-01 END AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_min@7 < 1996-01-01 END, required_guarantees=[l_shipmode in (FOB, SHIP)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_orderpriority] - -RaySQL Plan -=========== - -Query Stage #0 (4 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_shipmode@4 as l_shipmode] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: (l_shipmode@4 = FOB OR l_shipmode@4 = SHIP) AND l_receiptdate@3 > l_commitdate@2 AND l_shipdate@1 < l_commitdate@2 AND l_receiptdate@3 >= 1995-01-01 AND l_receiptdate@3 < 1996-01-01 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode], predicate=(l_shipmode@14 = FOB OR l_shipmode@14 = SHIP) AND l_receiptdate@12 > l_commitdate@11 AND l_shipdate@10 < l_commitdate@11 AND l_receiptdate@12 >= 1995-01-01 AND l_receiptdate@12 < 1996-01-01, pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= FOB AND FOB <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= SHIP AND SHIP <= l_shipmode_max@1 END) AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_max@4 >= 1995-01-01 END AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_min@7 < 1996-01-01 END, required_guarantees=[l_shipmode in (FOB, SHIP)] - -Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_orderpriority] - -Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "l_shipmode", index: 0 }], 4)) - AggregateExec: mode=Partial, gby=[l_shipmode@1 as l_shipmode], aggr=[sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)] - ProjectionExec: expr=[o_orderpriority@1 as o_orderpriority, l_shipmode@0 as l_shipmode] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_orderkey@0, o_orderkey@0)], projection=[l_shipmode@1, o_orderpriority@3] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - -Query Stage #3 (4 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "l_shipmode", index: 0 }], 4)) - SortExec: expr=[l_shipmode@0 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[l_shipmode@0 as l_shipmode, sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)@1 as high_line_count, sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)@2 as low_line_count] - AggregateExec: mode=FinalPartitioned, gby=[l_shipmode@0 as l_shipmode], aggr=[sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "l_shipmode", index: 0 }], 4)) - -Query Stage #4 (4 -> 1): -SortPreservingMergeExec: [l_shipmode@0 ASC NULLS LAST] - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "l_shipmode", index: 0 }], 4)) - diff --git a/testdata/expected-plans/q13.txt b/testdata/expected-plans/q13.txt deleted file mode 100644 index 08e307a..0000000 --- a/testdata/expected-plans/q13.txt +++ /dev/null @@ -1,78 +0,0 @@ -DataFusion Logical Plan -======================= - -Sort: custdist DESC NULLS FIRST, c_orders.c_count DESC NULLS FIRST - Projection: c_orders.c_count, count(*) AS custdist - Aggregate: groupBy=[[c_orders.c_count]], aggr=[[count(Int64(1)) AS count(*)]] - SubqueryAlias: c_orders - Projection: count(orders.o_orderkey) AS c_count - Aggregate: groupBy=[[customer.c_custkey]], aggr=[[count(orders.o_orderkey)]] - Projection: customer.c_custkey, orders.o_orderkey - Left Join: customer.c_custkey = orders.o_custkey - TableScan: customer projection=[c_custkey] - Projection: orders.o_orderkey, orders.o_custkey - Filter: orders.o_comment NOT LIKE Utf8("%express%requests%") - TableScan: orders projection=[o_orderkey, o_custkey, o_comment], partial_filters=[orders.o_comment NOT LIKE Utf8("%express%requests%")] - -DataFusion Physical Plan -======================== - -SortPreservingMergeExec: [custdist@1 DESC,c_count@0 DESC] - SortExec: expr=[custdist@1 DESC,c_count@0 DESC], preserve_partitioning=[true] - ProjectionExec: expr=[c_count@0 as c_count, count(*)@1 as custdist] - AggregateExec: mode=FinalPartitioned, gby=[c_count@0 as c_count], aggr=[count(*)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([c_count@0], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[c_count@0 as c_count], aggr=[count(*)] - ProjectionExec: expr=[count(orders.o_orderkey)@1 as c_count] - AggregateExec: mode=SinglePartitioned, gby=[c_custkey@0 as c_custkey], aggr=[count(orders.o_orderkey)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Left, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, o_orderkey@1] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_0.parquet:0..3472684, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:0..3399474], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:3399474..3842490, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_2.parquet:0..3498085, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:0..2931057], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:2931057..3110748, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..6692467], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6692467..6784093, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..6780531]]}, projection=[c_custkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4 - ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_comment@2 NOT LIKE %express%requests% - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_custkey, o_comment], predicate=o_comment@8 NOT LIKE %express%requests% - -RaySQL Plan -=========== - -Query Stage #0 (4 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_0.parquet:0..3472684, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:0..3399474], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:3399474..3842490, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_2.parquet:0..3498085, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:0..2931057], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:2931057..3110748, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..6692467], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6692467..6784093, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..6780531]]}, projection=[c_custkey] - -Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) - ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_comment@2 NOT LIKE %express%requests% - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_custkey, o_comment], predicate=o_comment@8 NOT LIKE %express%requests% - -Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "c_count", index: 0 }], 4)) - AggregateExec: mode=Partial, gby=[c_count@0 as c_count], aggr=[count(*)] - ProjectionExec: expr=[count(orders.o_orderkey)@1 as c_count] - AggregateExec: mode=SinglePartitioned, gby=[c_custkey@0 as c_custkey], aggr=[count(orders.o_orderkey)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Left, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, o_orderkey@1] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) - -Query Stage #3 (4 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "c_count", index: 0 }], 4)) - SortExec: expr=[custdist@1 DESC,c_count@0 DESC], preserve_partitioning=[true] - ProjectionExec: expr=[c_count@0 as c_count, count(*)@1 as custdist] - AggregateExec: mode=FinalPartitioned, gby=[c_count@0 as c_count], aggr=[count(*)] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "c_count", index: 0 }], 4)) - -Query Stage #4 (4 -> 1): -SortPreservingMergeExec: [custdist@1 DESC,c_count@0 DESC] - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "c_count", index: 0 }], 4)) - diff --git a/testdata/expected-plans/q14.txt b/testdata/expected-plans/q14.txt deleted file mode 100644 index e8eb357..0000000 --- a/testdata/expected-plans/q14.txt +++ /dev/null @@ -1,63 +0,0 @@ -DataFusion Logical Plan -======================= - -Projection: Float64(100) * CAST(sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END) AS Float64) / CAST(sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS Float64) AS promo_revenue - Aggregate: groupBy=[[]], aggr=[[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN __common_expr_1 ELSE Decimal128(Some(0),35,4) END) AS sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(__common_expr_1) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] - Projection: lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount) AS __common_expr_1, part.p_type - Inner Join: lineitem.l_partkey = part.p_partkey - Projection: lineitem.l_partkey, lineitem.l_extendedprice, lineitem.l_discount - Filter: lineitem.l_shipdate >= Date32("1995-02-01") AND lineitem.l_shipdate < Date32("1995-03-01") - TableScan: lineitem projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], partial_filters=[lineitem.l_shipdate >= Date32("1995-02-01"), lineitem.l_shipdate < Date32("1995-03-01")] - TableScan: part projection=[p_partkey, p_type] - -DataFusion Physical Plan -======================== - -ProjectionExec: expr=[100 * CAST(sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END)@0 AS Float64) / CAST(sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 AS Float64) as promo_revenue] - AggregateExec: mode=Final, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - CoalescePartitionsExec - AggregateExec: mode=Partial, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, p_type@0 as p_type] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_type@1, l_extendedprice@3, l_discount@4] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet:0..3454243, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:0..61547], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:61547..3452413, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:0..124924], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:124924..2056321, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_1.parquet:0..1526046, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:0..58347], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:58347..1788444, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_3.parquet:0..1785690]]}, projection=[p_partkey, p_type] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4 - ProjectionExec: expr=[l_partkey@0 as l_partkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@3 >= 1995-02-01 AND l_shipdate@3 < 1995-03-01 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-02-01 AND l_shipdate@10 < 1995-03-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-02-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-03-01 END, required_guarantees=[] - -RaySQL Plan -=========== - -Query Stage #0 (4 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet:0..3454243, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:0..61547], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:61547..3452413, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:0..124924], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:124924..2056321, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_1.parquet:0..1526046, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:0..58347], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:58347..1788444, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_3.parquet:0..1785690]]}, projection=[p_partkey, p_type] - -Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) - ProjectionExec: expr=[l_partkey@0 as l_partkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@3 >= 1995-02-01 AND l_shipdate@3 < 1995-03-01 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-02-01 AND l_shipdate@10 < 1995-03-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-02-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-03-01 END, required_guarantees=[] - -Query Stage #2 (4 -> 1): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([], 4)) - AggregateExec: mode=Partial, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, p_type@0 as p_type] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_type@1, l_extendedprice@3, l_discount@4] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) - -Query Stage #3 (1 -> 1): -ProjectionExec: expr=[100 * CAST(sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END)@0 AS Float64) / CAST(sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 AS Float64) as promo_revenue] - AggregateExec: mode=Final, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - CoalescePartitionsExec - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([], 4)) - diff --git a/testdata/expected-plans/q16.txt b/testdata/expected-plans/q16.txt deleted file mode 100644 index 0738c4b..0000000 --- a/testdata/expected-plans/q16.txt +++ /dev/null @@ -1,113 +0,0 @@ -DataFusion Logical Plan -======================= - -Sort: supplier_cnt DESC NULLS FIRST, part.p_brand ASC NULLS LAST, part.p_type ASC NULLS LAST, part.p_size ASC NULLS LAST - Projection: part.p_brand, part.p_type, part.p_size, count(alias1) AS supplier_cnt - Aggregate: groupBy=[[part.p_brand, part.p_type, part.p_size]], aggr=[[count(alias1)]] - Aggregate: groupBy=[[part.p_brand, part.p_type, part.p_size, partsupp.ps_suppkey AS alias1]], aggr=[[]] - LeftAnti Join: partsupp.ps_suppkey = __correlated_sq_1.s_suppkey - Projection: partsupp.ps_suppkey, part.p_brand, part.p_type, part.p_size - Inner Join: partsupp.ps_partkey = part.p_partkey - TableScan: partsupp projection=[ps_partkey, ps_suppkey] - Filter: part.p_brand != Utf8("Brand#14") AND part.p_type NOT LIKE Utf8("SMALL PLATED%") AND part.p_size IN ([Int32(14), Int32(6), Int32(5), Int32(31), Int32(49), Int32(15), Int32(41), Int32(47)]) - TableScan: part projection=[p_partkey, p_brand, p_type, p_size], partial_filters=[part.p_brand != Utf8("Brand#14"), part.p_type NOT LIKE Utf8("SMALL PLATED%"), part.p_size IN ([Int32(14), Int32(6), Int32(5), Int32(31), Int32(49), Int32(15), Int32(41), Int32(47)])] - SubqueryAlias: __correlated_sq_1 - Projection: supplier.s_suppkey - Filter: supplier.s_comment LIKE Utf8("%Customer%Complaints%") - TableScan: supplier projection=[s_suppkey, s_comment], partial_filters=[supplier.s_comment LIKE Utf8("%Customer%Complaints%")] - -DataFusion Physical Plan -======================== - -SortPreservingMergeExec: [supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 ASC NULLS LAST,p_size@2 ASC NULLS LAST] - SortExec: expr=[supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 ASC NULLS LAST,p_size@2 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size, count(alias1)@3 as supplier_cnt] - AggregateExec: mode=FinalPartitioned, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size], aggr=[count(alias1)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([p_brand@0, p_type@1, p_size@2], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size], aggr=[count(alias1)] - AggregateExec: mode=FinalPartitioned, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size, alias1@3 as alias1], aggr=[] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([p_brand@0, p_type@1, p_size@2, alias1@3], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[p_brand@1 as p_brand, p_type@2 as p_type, p_size@3 as p_size, ps_suppkey@0 as alias1], aggr=[] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=RightAnti, on=[(s_suppkey@0, ps_suppkey@0)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 - ProjectionExec: expr=[s_suppkey@0 as s_suppkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: s_comment@1 LIKE %Customer%Complaints% - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_comment], predicate=s_comment@6 LIKE %Customer%Complaints% - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4 - ProjectionExec: expr=[ps_suppkey@3 as ps_suppkey, p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_brand@1, p_type@2, p_size@3, ps_suppkey@5] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_brand@1 != Brand#14 AND p_type@2 NOT LIKE SMALL PLATED% AND Use p_size@3 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet:0..3454243, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:0..61547], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:61547..3452413, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:0..124924], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:124924..2056321, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_1.parquet:0..1526046, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:0..58347], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:58347..1788444, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_3.parquet:0..1785690]]}, projection=[p_partkey, p_brand, p_type, p_size], predicate=p_brand@3 != Brand#14 AND p_type@4 NOT LIKE SMALL PLATED% AND Use p_size@5 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]), pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 != Brand#14 OR Brand#14 != p_brand_max@1 END AND (CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 14 AND 14 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 6 AND 6 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 5 AND 5 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 31 AND 31 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 49 AND 49 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 15 AND 15 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 41 AND 41 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 47 AND 47 <= p_size_max@5 END), required_guarantees=[p_brand not in (Brand#14), p_size in (6, 47, 5, 14, 15, 41, 49, 31)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_0.parquet:0..11328263, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:0..9740046], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:9740046..10585781, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_2.parquet:0..9946761, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:0..10275813], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:10275813..10620507, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..20723615], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:20723615..20895812, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..20896109]]}, projection=[ps_partkey, ps_suppkey] - -RaySQL Plan -=========== - -Query Stage #0 (4 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ProjectionExec: expr=[s_suppkey@0 as s_suppkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: s_comment@1 LIKE %Customer%Complaints% - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_comment], predicate=s_comment@6 LIKE %Customer%Complaints% - -Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_brand@1 != Brand#14 AND p_type@2 NOT LIKE SMALL PLATED% AND Use p_size@3 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet:0..3454243, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:0..61547], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:61547..3452413, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:0..124924], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:124924..2056321, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_1.parquet:0..1526046, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:0..58347], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:58347..1788444, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_3.parquet:0..1785690]]}, projection=[p_partkey, p_brand, p_type, p_size], predicate=p_brand@3 != Brand#14 AND p_type@4 NOT LIKE SMALL PLATED% AND Use p_size@5 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]), pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 != Brand#14 OR Brand#14 != p_brand_max@1 END AND (CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 14 AND 14 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 6 AND 6 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 5 AND 5 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 31 AND 31 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 49 AND 49 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 15 AND 15 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 41 AND 41 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 47 AND 47 <= p_size_max@5 END), required_guarantees=[p_brand not in (Brand#14), p_size in (6, 47, 5, 14, 15, 41, 49, 31)] - -Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_0.parquet:0..11328263, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:0..9740046], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:9740046..10585781, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_2.parquet:0..9946761, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:0..10275813], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:10275813..10620507, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..20723615], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:20723615..20895812, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..20896109]]}, projection=[ps_partkey, ps_suppkey] - -Query Stage #3 (4 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4)) - ProjectionExec: expr=[ps_suppkey@3 as ps_suppkey, p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_brand@1, p_type@2, p_size@3, ps_suppkey@5] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) - -Query Stage #4 (4 -> 4): -ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "p_brand", index: 0 }, Column { name: "p_type", index: 1 }, Column { name: "p_size", index: 2 }, Column { name: "alias1", index: 3 }], 4)) - AggregateExec: mode=Partial, gby=[p_brand@1 as p_brand, p_type@2 as p_type, p_size@3 as p_size, ps_suppkey@0 as alias1], aggr=[] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=RightAnti, on=[(s_suppkey@0, ps_suppkey@0)] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4)) - -Query Stage #5 (4 -> 4): -ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "p_brand", index: 0 }, Column { name: "p_type", index: 1 }, Column { name: "p_size", index: 2 }], 4)) - AggregateExec: mode=Partial, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size], aggr=[count(alias1)] - AggregateExec: mode=FinalPartitioned, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size, alias1@3 as alias1], aggr=[] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "p_brand", index: 0 }, Column { name: "p_type", index: 1 }, Column { name: "p_size", index: 2 }, Column { name: "alias1", index: 3 }], 4)) - -Query Stage #6 (4 -> 4): -ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "p_brand", index: 0 }, Column { name: "p_type", index: 1 }, Column { name: "p_size", index: 2 }], 4)) - SortExec: expr=[supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 ASC NULLS LAST,p_size@2 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size, count(alias1)@3 as supplier_cnt] - AggregateExec: mode=FinalPartitioned, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size], aggr=[count(alias1)] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "p_brand", index: 0 }, Column { name: "p_type", index: 1 }, Column { name: "p_size", index: 2 }], 4)) - -Query Stage #7 (4 -> 1): -SortPreservingMergeExec: [supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 ASC NULLS LAST,p_size@2 ASC NULLS LAST] - ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "p_brand", index: 0 }, Column { name: "p_type", index: 1 }, Column { name: "p_size", index: 2 }], 4)) - diff --git a/testdata/expected-plans/q17.txt b/testdata/expected-plans/q17.txt deleted file mode 100644 index e23b9ba..0000000 --- a/testdata/expected-plans/q17.txt +++ /dev/null @@ -1,88 +0,0 @@ -DataFusion Logical Plan -======================= - -Projection: CAST(sum(lineitem.l_extendedprice) AS Float64) / Float64(7) AS avg_yearly - Aggregate: groupBy=[[]], aggr=[[sum(lineitem.l_extendedprice)]] - Projection: lineitem.l_extendedprice - Inner Join: part.p_partkey = __scalar_sq_1.l_partkey Filter: CAST(lineitem.l_quantity AS Decimal128(30, 15)) < __scalar_sq_1.Float64(0.2) * avg(lineitem.l_quantity) - Projection: lineitem.l_quantity, lineitem.l_extendedprice, part.p_partkey - Inner Join: lineitem.l_partkey = part.p_partkey - TableScan: lineitem projection=[l_partkey, l_quantity, l_extendedprice] - Projection: part.p_partkey - Filter: part.p_brand = Utf8("Brand#42") AND part.p_container = Utf8("LG BAG") - TableScan: part projection=[p_partkey, p_brand, p_container], partial_filters=[part.p_brand = Utf8("Brand#42"), part.p_container = Utf8("LG BAG")] - SubqueryAlias: __scalar_sq_1 - Projection: CAST(Float64(0.2) * CAST(avg(lineitem.l_quantity) AS Float64) AS Decimal128(30, 15)), lineitem.l_partkey - Aggregate: groupBy=[[lineitem.l_partkey]], aggr=[[avg(lineitem.l_quantity)]] - TableScan: lineitem projection=[l_partkey, l_quantity] - -DataFusion Physical Plan -======================== - -ProjectionExec: expr=[CAST(sum(lineitem.l_extendedprice)@0 AS Float64) / 7 as avg_yearly] - AggregateExec: mode=Final, gby=[], aggr=[sum(lineitem.l_extendedprice)] - CoalescePartitionsExec - AggregateExec: mode=Partial, gby=[], aggr=[sum(lineitem.l_extendedprice)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@2, l_partkey@1)], filter=CAST(l_quantity@0 AS Decimal128(30, 15)) < Float64(0.2) * avg(lineitem.l_quantity)@1, projection=[l_extendedprice@1] - ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, p_partkey@0 as p_partkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_partkey@0, l_quantity@2, l_extendedprice@3] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 - ProjectionExec: expr=[p_partkey@0 as p_partkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_brand@1 = Brand#42 AND p_container@2 = LG BAG - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet:0..3454243, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:0..61547], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:61547..3452413, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:0..124924], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:124924..2056321, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_1.parquet:0..1526046, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:0..58347], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:58347..1788444, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_3.parquet:0..1785690]]}, projection=[p_partkey, p_brand, p_container], predicate=p_brand@3 = Brand#42 AND p_container@6 = LG BAG, pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 <= Brand#42 AND Brand#42 <= p_brand_max@1 END AND CASE WHEN p_container_null_count@6 = p_container_row_count@7 THEN false ELSE p_container_min@4 <= LG BAG AND LG BAG <= p_container_max@5 END, required_guarantees=[p_brand in (Brand#42), p_container in (LG BAG)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_partkey, l_quantity, l_extendedprice] - ProjectionExec: expr=[CAST(0.2 * CAST(avg(lineitem.l_quantity)@1 AS Float64) AS Decimal128(30, 15)) as Float64(0.2) * avg(lineitem.l_quantity), l_partkey@0 as l_partkey] - AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_partkey, l_quantity] - -RaySQL Plan -=========== - -Query Stage #0 (4 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - ProjectionExec: expr=[p_partkey@0 as p_partkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_brand@1 = Brand#42 AND p_container@2 = LG BAG - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet:0..3454243, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:0..61547], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:61547..3452413, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:0..124924], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:124924..2056321, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_1.parquet:0..1526046, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:0..58347], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:58347..1788444, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_3.parquet:0..1785690]]}, projection=[p_partkey, p_brand, p_container], predicate=p_brand@3 = Brand#42 AND p_container@6 = LG BAG, pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 <= Brand#42 AND Brand#42 <= p_brand_max@1 END AND CASE WHEN p_container_null_count@6 = p_container_row_count@7 THEN false ELSE p_container_min@4 <= LG BAG AND LG BAG <= p_container_max@5 END, required_guarantees=[p_brand in (Brand#42), p_container in (LG BAG)] - -Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_partkey, l_quantity, l_extendedprice] - -Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) - AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_partkey, l_quantity] - -Query Stage #3 (4 -> 1): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([], 4)) - AggregateExec: mode=Partial, gby=[], aggr=[sum(lineitem.l_extendedprice)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@2, l_partkey@1)], filter=CAST(l_quantity@0 AS Decimal128(30, 15)) < Float64(0.2) * avg(lineitem.l_quantity)@1, projection=[l_extendedprice@1] - ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, p_partkey@0 as p_partkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_partkey@0, l_quantity@2, l_extendedprice@3] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) - ProjectionExec: expr=[CAST(0.2 * CAST(avg(lineitem.l_quantity)@1 AS Float64) AS Decimal128(30, 15)) as Float64(0.2) * avg(lineitem.l_quantity), l_partkey@0 as l_partkey] - AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) - -Query Stage #4 (1 -> 1): -ProjectionExec: expr=[CAST(sum(lineitem.l_extendedprice)@0 AS Float64) / 7 as avg_yearly] - AggregateExec: mode=Final, gby=[], aggr=[sum(lineitem.l_extendedprice)] - CoalescePartitionsExec - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([], 4)) - diff --git a/testdata/expected-plans/q18.txt b/testdata/expected-plans/q18.txt deleted file mode 100644 index 1952e3b..0000000 --- a/testdata/expected-plans/q18.txt +++ /dev/null @@ -1,115 +0,0 @@ -DataFusion Logical Plan -======================= - -Limit: skip=0, fetch=100 - Sort: orders.o_totalprice DESC NULLS FIRST, orders.o_orderdate ASC NULLS LAST, fetch=100 - Aggregate: groupBy=[[customer.c_name, customer.c_custkey, orders.o_orderkey, orders.o_orderdate, orders.o_totalprice]], aggr=[[sum(lineitem.l_quantity)]] - LeftSemi Join: orders.o_orderkey = __correlated_sq_1.l_orderkey - Projection: customer.c_custkey, customer.c_name, orders.o_orderkey, orders.o_totalprice, orders.o_orderdate, lineitem.l_quantity - Inner Join: orders.o_orderkey = lineitem.l_orderkey - Projection: customer.c_custkey, customer.c_name, orders.o_orderkey, orders.o_totalprice, orders.o_orderdate - Inner Join: customer.c_custkey = orders.o_custkey - TableScan: customer projection=[c_custkey, c_name] - TableScan: orders projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] - TableScan: lineitem projection=[l_orderkey, l_quantity] - SubqueryAlias: __correlated_sq_1 - Projection: lineitem.l_orderkey - Filter: sum(lineitem.l_quantity) > Decimal128(Some(31300),21,2) - Aggregate: groupBy=[[lineitem.l_orderkey]], aggr=[[sum(lineitem.l_quantity)]] - TableScan: lineitem projection=[l_orderkey, l_quantity] - -DataFusion Physical Plan -======================== - -GlobalLimitExec: skip=0, fetch=100 - SortPreservingMergeExec: [o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAST], fetch=100 - SortExec: TopK(fetch=100), expr=[o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAST], preserve_partitioning=[true] - AggregateExec: mode=FinalPartitioned, gby=[c_name@0 as c_name, c_custkey@1 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@3 as o_orderdate, o_totalprice@4 as o_totalprice], aggr=[sum(lineitem.l_quantity)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([c_name@0, c_custkey@1, o_orderkey@2, o_orderdate@3, o_totalprice@4], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[c_name@1 as c_name, c_custkey@0 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@4 as o_orderdate, o_totalprice@3 as o_totalprice], aggr=[sum(lineitem.l_quantity)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=RightSemi, on=[(l_orderkey@0, o_orderkey@2)] - ProjectionExec: expr=[l_orderkey@0 as l_orderkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: sum(lineitem.l_quantity)@1 > Some(31300),21,2 - AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_quantity] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@2, l_orderkey@0)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@3, o_orderdate@4, l_quantity@6] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_orderkey@2], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@4, o_orderdate@5] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_0.parquet:0..3472684, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:0..3399474], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:3399474..3842490, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_2.parquet:0..3498085, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:0..2931057], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:2931057..3110748, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..6692467], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6692467..6784093, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..6780531]]}, projection=[c_custkey, c_name] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_quantity] - -RaySQL Plan -=========== - -Query Stage #0 (4 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - AggregateExec: mode=Partial, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_quantity] - -Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_0.parquet:0..3472684, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:0..3399474], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:3399474..3842490, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_2.parquet:0..3498085, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:0..2931057], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:2931057..3110748, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..6692467], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6692467..6784093, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..6780531]]}, projection=[c_custkey, c_name] - -Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] - -Query Stage #3 (4 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "o_orderkey", index: 2 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@4, o_orderdate@5] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) - -Query Stage #4 (4 -> 4): -ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_quantity] - -Query Stage #5 (4 -> 4): -ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "c_name", index: 0 }, Column { name: "c_custkey", index: 1 }, Column { name: "o_orderkey", index: 2 }, Column { name: "o_orderdate", index: 3 }, Column { name: "o_totalprice", index: 4 }], 4)) - AggregateExec: mode=Partial, gby=[c_name@1 as c_name, c_custkey@0 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@4 as o_orderdate, o_totalprice@3 as o_totalprice], aggr=[sum(lineitem.l_quantity)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=RightSemi, on=[(l_orderkey@0, o_orderkey@2)] - ProjectionExec: expr=[l_orderkey@0 as l_orderkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: sum(lineitem.l_quantity)@1 > Some(31300),21,2 - AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@2, l_orderkey@0)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@3, o_orderdate@4, l_quantity@6] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "o_orderkey", index: 2 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - -Query Stage #6 (4 -> 4): -ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "c_name", index: 0 }, Column { name: "c_custkey", index: 1 }, Column { name: "o_orderkey", index: 2 }, Column { name: "o_orderdate", index: 3 }, Column { name: "o_totalprice", index: 4 }], 4)) - SortExec: TopK(fetch=100), expr=[o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAST], preserve_partitioning=[true] - AggregateExec: mode=FinalPartitioned, gby=[c_name@0 as c_name, c_custkey@1 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@3 as o_orderdate, o_totalprice@4 as o_totalprice], aggr=[sum(lineitem.l_quantity)] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "c_name", index: 0 }, Column { name: "c_custkey", index: 1 }, Column { name: "o_orderkey", index: 2 }, Column { name: "o_orderdate", index: 3 }, Column { name: "o_totalprice", index: 4 }], 4)) - -Query Stage #7 (1 -> 1): -GlobalLimitExec: skip=0, fetch=100 - SortPreservingMergeExec: [o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAST], fetch=100 - ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "c_name", index: 0 }, Column { name: "c_custkey", index: 1 }, Column { name: "o_orderkey", index: 2 }, Column { name: "o_orderdate", index: 3 }, Column { name: "o_totalprice", index: 4 }], 4)) - diff --git a/testdata/expected-plans/q19.txt b/testdata/expected-plans/q19.txt deleted file mode 100644 index de5b3a8..0000000 --- a/testdata/expected-plans/q19.txt +++ /dev/null @@ -1,66 +0,0 @@ -DataFusion Logical Plan -======================= - -Projection: sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue - Aggregate: groupBy=[[]], aggr=[[sum(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] - Projection: lineitem.l_extendedprice, lineitem.l_discount - Inner Join: lineitem.l_partkey = part.p_partkey Filter: part.p_brand = Utf8("Brand#21") AND part.p_container IN ([Utf8("SM CASE"), Utf8("SM BOX"), Utf8("SM PACK"), Utf8("SM PKG")]) AND lineitem.l_quantity >= Decimal128(Some(800),11,2) AND lineitem.l_quantity <= Decimal128(Some(1800),11,2) AND part.p_size <= Int32(5) OR part.p_brand = Utf8("Brand#13") AND part.p_container IN ([Utf8("MED BAG"), Utf8("MED BOX"), Utf8("MED PKG"), Utf8("MED PACK")]) AND lineitem.l_quantity >= Decimal128(Some(2000),11,2) AND lineitem.l_quantity <= Decimal128(Some(3000),11,2) AND part.p_size <= Int32(10) OR part.p_brand = Utf8("Brand#52") AND part.p_container IN ([Utf8("LG CASE"), Utf8("LG BOX"), Utf8("LG PACK"), Utf8("LG PKG")]) AND lineitem.l_quantity >= Decimal128(Some(3000),11,2) AND lineitem.l_quantity <= Decimal128(Some(4000),11,2) AND part.p_size <= Int32(15) - Projection: lineitem.l_partkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount - Filter: (lineitem.l_quantity >= Decimal128(Some(800),11,2) AND lineitem.l_quantity <= Decimal128(Some(1800),11,2) OR lineitem.l_quantity >= Decimal128(Some(2000),11,2) AND lineitem.l_quantity <= Decimal128(Some(3000),11,2) OR lineitem.l_quantity >= Decimal128(Some(3000),11,2) AND lineitem.l_quantity <= Decimal128(Some(4000),11,2)) AND (lineitem.l_shipmode = Utf8("AIR") OR lineitem.l_shipmode = Utf8("AIR REG")) AND lineitem.l_shipinstruct = Utf8("DELIVER IN PERSON") - TableScan: lineitem projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], partial_filters=[lineitem.l_shipmode = Utf8("AIR") OR lineitem.l_shipmode = Utf8("AIR REG"), lineitem.l_shipinstruct = Utf8("DELIVER IN PERSON"), lineitem.l_quantity >= Decimal128(Some(800),11,2) AND lineitem.l_quantity <= Decimal128(Some(1800),11,2) OR lineitem.l_quantity >= Decimal128(Some(2000),11,2) AND lineitem.l_quantity <= Decimal128(Some(3000),11,2) OR lineitem.l_quantity >= Decimal128(Some(3000),11,2) AND lineitem.l_quantity <= Decimal128(Some(4000),11,2)] - Filter: (part.p_brand = Utf8("Brand#21") AND part.p_container IN ([Utf8("SM CASE"), Utf8("SM BOX"), Utf8("SM PACK"), Utf8("SM PKG")]) AND part.p_size <= Int32(5) OR part.p_brand = Utf8("Brand#13") AND part.p_container IN ([Utf8("MED BAG"), Utf8("MED BOX"), Utf8("MED PKG"), Utf8("MED PACK")]) AND part.p_size <= Int32(10) OR part.p_brand = Utf8("Brand#52") AND part.p_container IN ([Utf8("LG CASE"), Utf8("LG BOX"), Utf8("LG PACK"), Utf8("LG PKG")]) AND part.p_size <= Int32(15)) AND part.p_size >= Int32(1) - TableScan: part projection=[p_partkey, p_brand, p_size, p_container], partial_filters=[part.p_size >= Int32(1), part.p_brand = Utf8("Brand#21") AND part.p_container IN ([Utf8("SM CASE"), Utf8("SM BOX"), Utf8("SM PACK"), Utf8("SM PKG")]) AND part.p_size <= Int32(5) OR part.p_brand = Utf8("Brand#13") AND part.p_container IN ([Utf8("MED BAG"), Utf8("MED BOX"), Utf8("MED PKG"), Utf8("MED PACK")]) AND part.p_size <= Int32(10) OR part.p_brand = Utf8("Brand#52") AND part.p_container IN ([Utf8("LG CASE"), Utf8("LG BOX"), Utf8("LG PACK"), Utf8("LG PKG")]) AND part.p_size <= Int32(15)] - -DataFusion Physical Plan -======================== - -ProjectionExec: expr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@0 as revenue] - AggregateExec: mode=Final, gby=[], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - CoalescePartitionsExec - AggregateExec: mode=Partial, gby=[], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], filter=p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND l_quantity@0 >= Some(800),11,2 AND l_quantity@0 <= Some(1800),11,2 AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND l_quantity@0 >= Some(2000),11,2 AND l_quantity@0 <= Some(3000),11,2 AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND l_quantity@0 >= Some(3000),11,2 AND l_quantity@0 <= Some(4000),11,2 AND p_size@2 <= 15, projection=[l_extendedprice@6, l_discount@7] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: (p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@2 <= 15) AND p_size@2 >= 1 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet:0..3454243, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:0..61547], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:61547..3452413, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:0..124924], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:124924..2056321, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_1.parquet:0..1526046, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:0..58347], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:58347..1788444, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_3.parquet:0..1785690]]}, projection=[p_partkey, p_brand, p_size, p_container], predicate=p_size@5 >= 1 AND (p_brand@3 = Brand#21 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@5 <= 5 OR p_brand@3 = Brand#13 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@5 <= 10 OR p_brand@3 = Brand#52 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@5 <= 15), pruning_predicate=CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_max@0 >= 1 END AND (CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#21 AND Brand#21 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM CASE AND SM CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM BOX AND SM BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PACK AND SM PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PKG AND SM PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 5 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#13 AND Brand#13 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BAG AND MED BAG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BOX AND MED BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PKG AND MED PKG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PACK AND MED PACK <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 10 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#52 AND Brand#52 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG CASE AND LG CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG BOX AND LG BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PACK AND LG PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PKG AND LG PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 15 END), required_guarantees=[] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4 - ProjectionExec: expr=[l_partkey@0 as l_partkey, l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: (l_quantity@1 >= Some(800),11,2 AND l_quantity@1 <= Some(1800),11,2 OR l_quantity@1 >= Some(2000),11,2 AND l_quantity@1 <= Some(3000),11,2 OR l_quantity@1 >= Some(3000),11,2 AND l_quantity@1 <= Some(4000),11,2) AND (l_shipmode@5 = AIR OR l_shipmode@5 = AIR REG) AND l_shipinstruct@4 = DELIVER IN PERSON - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], predicate=(l_shipmode@14 = AIR OR l_shipmode@14 = AIR REG) AND l_shipinstruct@13 = DELIVER IN PERSON AND (l_quantity@4 >= Some(800),11,2 AND l_quantity@4 <= Some(1800),11,2 OR l_quantity@4 >= Some(2000),11,2 AND l_quantity@4 <= Some(3000),11,2 OR l_quantity@4 >= Some(3000),11,2 AND l_quantity@4 <= Some(4000),11,2), pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR AND AIR <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR REG AND AIR REG <= l_shipmode_max@1 END) AND CASE WHEN l_shipinstruct_null_count@6 = l_shipinstruct_row_count@7 THEN false ELSE l_shipinstruct_min@4 <= DELIVER IN PERSON AND DELIVER IN PERSON <= l_shipinstruct_max@5 END AND (CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(800),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(1800),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(2000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(3000),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(3000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(4000),11,2 END), required_guarantees=[l_shipmode in (AIR REG, AIR), l_shipinstruct in (DELIVER IN PERSON)] - -RaySQL Plan -=========== - -Query Stage #0 (4 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: (p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@2 <= 15) AND p_size@2 >= 1 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet:0..3454243, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:0..61547], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:61547..3452413, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:0..124924], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:124924..2056321, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_1.parquet:0..1526046, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:0..58347], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:58347..1788444, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_3.parquet:0..1785690]]}, projection=[p_partkey, p_brand, p_size, p_container], predicate=p_size@5 >= 1 AND (p_brand@3 = Brand#21 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@5 <= 5 OR p_brand@3 = Brand#13 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@5 <= 10 OR p_brand@3 = Brand#52 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@5 <= 15), pruning_predicate=CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_max@0 >= 1 END AND (CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#21 AND Brand#21 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM CASE AND SM CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM BOX AND SM BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PACK AND SM PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PKG AND SM PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 5 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#13 AND Brand#13 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BAG AND MED BAG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BOX AND MED BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PKG AND MED PKG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PACK AND MED PACK <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 10 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#52 AND Brand#52 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG CASE AND LG CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG BOX AND LG BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PACK AND LG PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PKG AND LG PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 15 END), required_guarantees=[] - -Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) - ProjectionExec: expr=[l_partkey@0 as l_partkey, l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: (l_quantity@1 >= Some(800),11,2 AND l_quantity@1 <= Some(1800),11,2 OR l_quantity@1 >= Some(2000),11,2 AND l_quantity@1 <= Some(3000),11,2 OR l_quantity@1 >= Some(3000),11,2 AND l_quantity@1 <= Some(4000),11,2) AND (l_shipmode@5 = AIR OR l_shipmode@5 = AIR REG) AND l_shipinstruct@4 = DELIVER IN PERSON - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], predicate=(l_shipmode@14 = AIR OR l_shipmode@14 = AIR REG) AND l_shipinstruct@13 = DELIVER IN PERSON AND (l_quantity@4 >= Some(800),11,2 AND l_quantity@4 <= Some(1800),11,2 OR l_quantity@4 >= Some(2000),11,2 AND l_quantity@4 <= Some(3000),11,2 OR l_quantity@4 >= Some(3000),11,2 AND l_quantity@4 <= Some(4000),11,2), pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR AND AIR <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR REG AND AIR REG <= l_shipmode_max@1 END) AND CASE WHEN l_shipinstruct_null_count@6 = l_shipinstruct_row_count@7 THEN false ELSE l_shipinstruct_min@4 <= DELIVER IN PERSON AND DELIVER IN PERSON <= l_shipinstruct_max@5 END AND (CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(800),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(1800),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(2000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(3000),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(3000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(4000),11,2 END), required_guarantees=[l_shipmode in (AIR REG, AIR), l_shipinstruct in (DELIVER IN PERSON)] - -Query Stage #2 (4 -> 1): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([], 4)) - AggregateExec: mode=Partial, gby=[], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], filter=p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND l_quantity@0 >= Some(800),11,2 AND l_quantity@0 <= Some(1800),11,2 AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND l_quantity@0 >= Some(2000),11,2 AND l_quantity@0 <= Some(3000),11,2 AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND l_quantity@0 >= Some(3000),11,2 AND l_quantity@0 <= Some(4000),11,2 AND p_size@2 <= 15, projection=[l_extendedprice@6, l_discount@7] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) - -Query Stage #3 (1 -> 1): -ProjectionExec: expr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@0 as revenue] - AggregateExec: mode=Final, gby=[], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - CoalescePartitionsExec - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([], 4)) - diff --git a/testdata/expected-plans/q2.txt b/testdata/expected-plans/q2.txt deleted file mode 100644 index e19e372..0000000 --- a/testdata/expected-plans/q2.txt +++ /dev/null @@ -1,264 +0,0 @@ -DataFusion Logical Plan -======================= - -Limit: skip=0, fetch=100 - Sort: supplier.s_acctbal DESC NULLS FIRST, nation.n_name ASC NULLS LAST, supplier.s_name ASC NULLS LAST, part.p_partkey ASC NULLS LAST, fetch=100 - Projection: supplier.s_acctbal, supplier.s_name, nation.n_name, part.p_partkey, part.p_mfgr, supplier.s_address, supplier.s_phone, supplier.s_comment - Inner Join: part.p_partkey = __scalar_sq_1.ps_partkey, partsupp.ps_supplycost = __scalar_sq_1.min(partsupp.ps_supplycost) - Projection: part.p_partkey, part.p_mfgr, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_acctbal, supplier.s_comment, partsupp.ps_supplycost, nation.n_name - Inner Join: nation.n_regionkey = region.r_regionkey - Projection: part.p_partkey, part.p_mfgr, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_acctbal, supplier.s_comment, partsupp.ps_supplycost, nation.n_name, nation.n_regionkey - Inner Join: supplier.s_nationkey = nation.n_nationkey - Projection: part.p_partkey, part.p_mfgr, supplier.s_name, supplier.s_address, supplier.s_nationkey, supplier.s_phone, supplier.s_acctbal, supplier.s_comment, partsupp.ps_supplycost - Inner Join: partsupp.ps_suppkey = supplier.s_suppkey - Projection: part.p_partkey, part.p_mfgr, partsupp.ps_suppkey, partsupp.ps_supplycost - Inner Join: part.p_partkey = partsupp.ps_partkey - Projection: part.p_partkey, part.p_mfgr - Filter: part.p_size = Int32(48) AND part.p_type LIKE Utf8("%TIN") - TableScan: part projection=[p_partkey, p_mfgr, p_type, p_size], partial_filters=[part.p_size = Int32(48), part.p_type LIKE Utf8("%TIN")] - TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost] - TableScan: supplier projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] - TableScan: nation projection=[n_nationkey, n_name, n_regionkey] - Projection: region.r_regionkey - Filter: region.r_name = Utf8("ASIA") - TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = Utf8("ASIA")] - SubqueryAlias: __scalar_sq_1 - Projection: min(partsupp.ps_supplycost), partsupp.ps_partkey - Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[min(partsupp.ps_supplycost)]] - Projection: partsupp.ps_partkey, partsupp.ps_supplycost - Inner Join: nation.n_regionkey = region.r_regionkey - Projection: partsupp.ps_partkey, partsupp.ps_supplycost, nation.n_regionkey - Inner Join: supplier.s_nationkey = nation.n_nationkey - Projection: partsupp.ps_partkey, partsupp.ps_supplycost, supplier.s_nationkey - Inner Join: partsupp.ps_suppkey = supplier.s_suppkey - TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost] - TableScan: supplier projection=[s_suppkey, s_nationkey] - TableScan: nation projection=[n_nationkey, n_regionkey] - Projection: region.r_regionkey - Filter: region.r_name = Utf8("ASIA") - TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = Utf8("ASIA")] - -DataFusion Physical Plan -======================== - -GlobalLimitExec: skip=0, fetch=100 - SortPreservingMergeExec: [s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST], fetch=100 - SortExec: TopK(fetch=100), expr=[s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[s_acctbal@5 as s_acctbal, s_name@2 as s_name, n_name@7 as n_name, p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, s_address@3 as s_address, s_phone@4 as s_phone, s_comment@6 as s_comment] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@1), (ps_supplycost@7, min(partsupp.ps_supplycost)@0)], projection=[p_partkey@0, p_mfgr@1, s_name@2, s_address@3, s_phone@4, s_acctbal@5, s_comment@6, n_name@8] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([p_partkey@0, ps_supplycost@7], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@9)], projection=[p_partkey@1, p_mfgr@2, s_name@3, s_address@4, s_phone@5, s_acctbal@6, s_comment@7, ps_supplycost@8, n_name@9] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = ASIA - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/8NlZWnksIQjnYDPs_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_regionkey@9], 4), input_partitions=4 - ProjectionExec: expr=[p_partkey@2 as p_partkey, p_mfgr@3 as p_mfgr, s_name@4 as s_name, s_address@5 as s_address, s_phone@6 as s_phone, s_acctbal@7 as s_acctbal, s_comment@8 as s_comment, ps_supplycost@9 as ps_supplycost, n_name@0 as n_name, n_regionkey@1 as n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@4)], projection=[n_name@1, n_regionkey@2, p_partkey@3, p_mfgr@4, s_name@5, s_address@6, s_phone@8, s_acctbal@9, s_comment@10, ps_supplycost@11] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_nationkey@4], 4), input_partitions=4 - ProjectionExec: expr=[p_partkey@6 as p_partkey, p_mfgr@7 as p_mfgr, s_name@0 as s_name, s_address@1 as s_address, s_nationkey@2 as s_nationkey, s_phone@3 as s_phone, s_acctbal@4 as s_acctbal, s_comment@5 as s_comment, ps_supplycost@8 as ps_supplycost] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@2)], projection=[s_name@1, s_address@2, s_nationkey@3, s_phone@4, s_acctbal@5, s_comment@6, p_partkey@7, p_mfgr@8, ps_supplycost@10] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([ps_suppkey@2], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_partkey@0, p_mfgr@1, ps_suppkey@3, ps_supplycost@4] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 - ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_size@3 = 48 AND p_type@2 LIKE %TIN - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet:0..3454243, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:0..61547], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:61547..3452413, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:0..124924], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:124924..2056321, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_1.parquet:0..1526046, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:0..58347], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:58347..1788444, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_3.parquet:0..1785690]]}, projection=[p_partkey, p_mfgr, p_type, p_size], predicate=p_size@5 = 48 AND p_type@4 LIKE %TIN, pruning_predicate=CASE WHEN p_size_null_count@2 = p_size_row_count@3 THEN false ELSE p_size_min@0 <= 48 AND 48 <= p_size_max@1 END, required_guarantees=[p_size in (48)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_0.parquet:0..11328263, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:0..9740046], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:9740046..10585781, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_2.parquet:0..9946761, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:0..10275813], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:10275813..10620507, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..20723615], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:20723615..20895812, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..20896109]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([ps_partkey@1, min(partsupp.ps_supplycost)@0], 4), input_partitions=4 - ProjectionExec: expr=[min(partsupp.ps_supplycost)@1 as min(partsupp.ps_supplycost), ps_partkey@0 as ps_partkey] - AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@2)], projection=[ps_partkey@1, ps_supplycost@2] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = ASIA - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/8NlZWnksIQjnYDPs_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_regionkey@2], 4), input_partitions=4 - ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, n_regionkey@0 as n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_regionkey@1, ps_partkey@2, ps_supplycost@3] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4 - ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_supplycost@4] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([ps_suppkey@1], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_0.parquet:0..11328263, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:0..9740046], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:9740046..10585781, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_2.parquet:0..9946761, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:0..10275813], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:10275813..10620507, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..20723615], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:20723615..20895812, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..20896109]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] - -RaySQL Plan -=========== - -Query Stage #0 (2 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = ASIA - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/8NlZWnksIQjnYDPs_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] - -Query Stage #1 (2 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] - -Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] - -Query Stage #3 (4 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_size@3 = 48 AND p_type@2 LIKE %TIN - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet:0..3454243, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:0..61547], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:61547..3452413, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:0..124924], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:124924..2056321, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_1.parquet:0..1526046, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:0..58347], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:58347..1788444, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_3.parquet:0..1785690]]}, projection=[p_partkey, p_mfgr, p_type, p_size], predicate=p_size@5 = 48 AND p_type@4 LIKE %TIN, pruning_predicate=CASE WHEN p_size_null_count@2 = p_size_row_count@3 THEN false ELSE p_size_min@0 <= 48 AND 48 <= p_size_max@1 END, required_guarantees=[p_size in (48)] - -Query Stage #4 (4 -> 4): -ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_0.parquet:0..11328263, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:0..9740046], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:9740046..10585781, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_2.parquet:0..9946761, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:0..10275813], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:10275813..10620507, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..20723615], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:20723615..20895812, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..20896109]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] - -Query Stage #5 (4 -> 4): -ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "ps_suppkey", index: 2 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_partkey@0, p_mfgr@1, ps_suppkey@3, ps_supplycost@4] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) - -Query Stage #6 (4 -> 4): -ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "s_nationkey", index: 4 }], 4)) - ProjectionExec: expr=[p_partkey@6 as p_partkey, p_mfgr@7 as p_mfgr, s_name@0 as s_name, s_address@1 as s_address, s_nationkey@2 as s_nationkey, s_phone@3 as s_phone, s_acctbal@4 as s_acctbal, s_comment@5 as s_comment, ps_supplycost@8 as ps_supplycost] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@2)], projection=[s_name@1, s_address@2, s_nationkey@3, s_phone@4, s_acctbal@5, s_comment@6, p_partkey@7, p_mfgr@8, ps_supplycost@10] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "ps_suppkey", index: 2 }], 4)) - -Query Stage #7 (4 -> 4): -ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "n_regionkey", index: 9 }], 4)) - ProjectionExec: expr=[p_partkey@2 as p_partkey, p_mfgr@3 as p_mfgr, s_name@4 as s_name, s_address@5 as s_address, s_phone@6 as s_phone, s_acctbal@7 as s_acctbal, s_comment@8 as s_comment, ps_supplycost@9 as ps_supplycost, n_name@0 as n_name, n_regionkey@1 as n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@4)], projection=[n_name@1, n_regionkey@2, p_partkey@3, p_mfgr@4, s_name@5, s_address@6, s_phone@8, s_acctbal@9, s_comment@10, ps_supplycost@11] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "s_nationkey", index: 4 }], 4)) - -Query Stage #8 (4 -> 4): -ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }, Column { name: "ps_supplycost", index: 7 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@9)], projection=[p_partkey@1, p_mfgr@2, s_name@3, s_address@4, s_phone@5, s_acctbal@6, s_comment@7, ps_supplycost@8, n_name@9] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "n_regionkey", index: 9 }], 4)) - -Query Stage #9 (2 -> 4): -ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = ASIA - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/8NlZWnksIQjnYDPs_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] - -Query Stage #10 (2 -> 4): -ShuffleWriterExec(stage_id=10, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_regionkey] - -Query Stage #11 (4 -> 4): -ShuffleWriterExec(stage_id=11, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] - -Query Stage #12 (4 -> 4): -ShuffleWriterExec(stage_id=12, output_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_0.parquet:0..11328263, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:0..9740046], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:9740046..10585781, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_2.parquet:0..9946761, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:0..10275813], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:10275813..10620507, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..20723615], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:20723615..20895812, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..20896109]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] - -Query Stage #13 (4 -> 4): -ShuffleWriterExec(stage_id=13, output_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) - ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_supplycost@4] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=11, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=12, input_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }], 4)) - -Query Stage #14 (4 -> 4): -ShuffleWriterExec(stage_id=14, output_partitioning=Hash([Column { name: "n_regionkey", index: 2 }], 4)) - ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, n_regionkey@0 as n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_regionkey@1, ps_partkey@2, ps_supplycost@3] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=10, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=13, input_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) - -Query Stage #15 (4 -> 4): -ShuffleWriterExec(stage_id=15, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) - AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@2)], projection=[ps_partkey@1, ps_supplycost@2] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=14, input_partitioning=Hash([Column { name: "n_regionkey", index: 2 }], 4)) - -Query Stage #16 (4 -> 4): -ShuffleWriterExec(stage_id=16, output_partitioning=Hash([Column { name: "ps_partkey", index: 1 }, Column { name: "min(partsupp.ps_supplycost)", index: 0 }], 4)) - ProjectionExec: expr=[min(partsupp.ps_supplycost)@1 as min(partsupp.ps_supplycost), ps_partkey@0 as ps_partkey] - AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=15, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) - -Query Stage #17 (4 -> 4): -ShuffleWriterExec(stage_id=17, output_partitioning=Hash([Column { name: "p_partkey", index: 3 }], 4)) - SortExec: TopK(fetch=100), expr=[s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[s_acctbal@5 as s_acctbal, s_name@2 as s_name, n_name@7 as n_name, p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, s_address@3 as s_address, s_phone@4 as s_phone, s_comment@6 as s_comment] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@1), (ps_supplycost@7, min(partsupp.ps_supplycost)@0)], projection=[p_partkey@0, p_mfgr@1, s_name@2, s_address@3, s_phone@4, s_acctbal@5, s_comment@6, n_name@8] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }, Column { name: "ps_supplycost", index: 7 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=16, input_partitioning=Hash([Column { name: "ps_partkey", index: 1 }, Column { name: "min(partsupp.ps_supplycost)", index: 0 }], 4)) - -Query Stage #18 (1 -> 1): -GlobalLimitExec: skip=0, fetch=100 - SortPreservingMergeExec: [s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST], fetch=100 - ShuffleReaderExec(stage_id=17, input_partitioning=Hash([Column { name: "p_partkey", index: 3 }], 4)) - diff --git a/testdata/expected-plans/q20.txt b/testdata/expected-plans/q20.txt deleted file mode 100644 index 4adb69a..0000000 --- a/testdata/expected-plans/q20.txt +++ /dev/null @@ -1,152 +0,0 @@ -DataFusion Logical Plan -======================= - -Sort: supplier.s_name ASC NULLS LAST - Projection: supplier.s_name, supplier.s_address - LeftSemi Join: supplier.s_suppkey = __correlated_sq_1.ps_suppkey - Projection: supplier.s_suppkey, supplier.s_name, supplier.s_address - Inner Join: supplier.s_nationkey = nation.n_nationkey - TableScan: supplier projection=[s_suppkey, s_name, s_address, s_nationkey] - Projection: nation.n_nationkey - Filter: nation.n_name = Utf8("KENYA") - TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("KENYA")] - SubqueryAlias: __correlated_sq_1 - Projection: partsupp.ps_suppkey - Inner Join: partsupp.ps_partkey = __scalar_sq_3.l_partkey, partsupp.ps_suppkey = __scalar_sq_3.l_suppkey Filter: CAST(partsupp.ps_availqty AS Float64) > __scalar_sq_3.Float64(0.5) * sum(lineitem.l_quantity) - LeftSemi Join: partsupp.ps_partkey = __correlated_sq_2.p_partkey - TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty] - SubqueryAlias: __correlated_sq_2 - Projection: part.p_partkey - Filter: part.p_name LIKE Utf8("blanched%") - TableScan: part projection=[p_partkey, p_name], partial_filters=[part.p_name LIKE Utf8("blanched%")] - SubqueryAlias: __scalar_sq_3 - Projection: Float64(0.5) * CAST(sum(lineitem.l_quantity) AS Float64), lineitem.l_partkey, lineitem.l_suppkey - Aggregate: groupBy=[[lineitem.l_partkey, lineitem.l_suppkey]], aggr=[[sum(lineitem.l_quantity)]] - Projection: lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_quantity - Filter: lineitem.l_shipdate >= Date32("1993-01-01") AND lineitem.l_shipdate < Date32("1994-01-01") - TableScan: lineitem projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], partial_filters=[lineitem.l_shipdate >= Date32("1993-01-01"), lineitem.l_shipdate < Date32("1994-01-01")] - -DataFusion Physical Plan -======================== - -SortPreservingMergeExec: [s_name@0 ASC NULLS LAST] - SortExec: expr=[s_name@0 ASC NULLS LAST], preserve_partitioning=[true] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_name@1, s_address@2] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[s_suppkey@1, s_name@2, s_address@3] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ProjectionExec: expr=[n_nationkey@0 as n_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = KENYA - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = KENYA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= KENYA AND KENYA <= n_name_max@1 END, required_guarantees=[n_name in (KENYA)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_nationkey@3], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_partkey@0, l_partkey@1), (ps_suppkey@1, l_suppkey@2)], filter=CAST(ps_availqty@0 AS Float64) > Float64(0.5) * sum(lineitem.l_quantity)@1, projection=[ps_suppkey@1] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([ps_partkey@0, ps_suppkey@1], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=RightSemi, on=[(p_partkey@0, ps_partkey@0)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 - ProjectionExec: expr=[p_partkey@0 as p_partkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_name@1 LIKE blanched% - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet:0..3454243, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:0..61547], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:61547..3452413, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:0..124924], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:124924..2056321, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_1.parquet:0..1526046, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:0..58347], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:58347..1788444, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_3.parquet:0..1785690]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE blanched% - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_0.parquet:0..11328263, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:0..9740046], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:9740046..10585781, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_2.parquet:0..9946761, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:0..10275813], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:10275813..10620507, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..20723615], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:20723615..20895812, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..20896109]]}, projection=[ps_partkey, ps_suppkey, ps_availqty] - ProjectionExec: expr=[0.5 * CAST(sum(lineitem.l_quantity)@2 AS Float64) as Float64(0.5) * sum(lineitem.l_quantity), l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey] - AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_partkey@0, l_suppkey@1], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] - ProjectionExec: expr=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey, l_quantity@2 as l_quantity] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@3 >= 1993-01-01 AND l_shipdate@3 < 1994-01-01 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], predicate=l_shipdate@10 >= 1993-01-01 AND l_shipdate@10 < 1994-01-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1993-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1994-01-01 END, required_guarantees=[] - -RaySQL Plan -=========== - -Query Stage #0 (2 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ProjectionExec: expr=[n_nationkey@0 as n_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = KENYA - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = KENYA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= KENYA AND KENYA <= n_name_max@1 END, required_guarantees=[n_name in (KENYA)] - -Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey] - -Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[s_suppkey@1, s_name@2, s_address@3] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4)) - -Query Stage #3 (4 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - ProjectionExec: expr=[p_partkey@0 as p_partkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_name@1 LIKE blanched% - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet:0..3454243, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:0..61547], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:61547..3452413, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:0..124924], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:124924..2056321, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_1.parquet:0..1526046, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:0..58347], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:58347..1788444, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_3.parquet:0..1785690]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE blanched% - -Query Stage #4 (4 -> 4): -ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_0.parquet:0..11328263, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:0..9740046], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:9740046..10585781, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_2.parquet:0..9946761, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:0..10275813], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:10275813..10620507, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..20723615], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:20723615..20895812, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..20896109]]}, projection=[ps_partkey, ps_suppkey, ps_availqty] - -Query Stage #5 (4 -> 4): -ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }, Column { name: "ps_suppkey", index: 1 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=RightSemi, on=[(p_partkey@0, ps_partkey@0)] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) - -Query Stage #6 (4 -> 4): -ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "l_partkey", index: 0 }, Column { name: "l_suppkey", index: 1 }], 4)) - AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] - ProjectionExec: expr=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey, l_quantity@2 as l_quantity] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@3 >= 1993-01-01 AND l_shipdate@3 < 1994-01-01 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], predicate=l_shipdate@10 >= 1993-01-01 AND l_shipdate@10 < 1994-01-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1993-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1994-01-01 END, required_guarantees=[] - -Query Stage #7 (4 -> 4): -ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_partkey@0, l_partkey@1), (ps_suppkey@1, l_suppkey@2)], filter=CAST(ps_availqty@0 AS Float64) > Float64(0.5) * sum(lineitem.l_quantity)@1, projection=[ps_suppkey@1] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }, Column { name: "ps_suppkey", index: 1 }], 4)) - ProjectionExec: expr=[0.5 * CAST(sum(lineitem.l_quantity)@2 AS Float64) as Float64(0.5) * sum(lineitem.l_quantity), l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey] - AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "l_partkey", index: 0 }, Column { name: "l_suppkey", index: 1 }], 4)) - -Query Stage #8 (4 -> 1): -ShuffleWriterExec(stage_id=8, output_partitioning=Hash([], 4)) - SortExec: expr=[s_name@0 ASC NULLS LAST], preserve_partitioning=[true] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_name@1, s_address@2] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4)) - -Query Stage #9 (4 -> 1): -SortPreservingMergeExec: [s_name@0 ASC NULLS LAST] - ShuffleReaderExec(stage_id=8, input_partitioning=Hash([], 4)) - diff --git a/testdata/expected-plans/q21.txt b/testdata/expected-plans/q21.txt deleted file mode 100644 index be86399..0000000 --- a/testdata/expected-plans/q21.txt +++ /dev/null @@ -1,188 +0,0 @@ -DataFusion Logical Plan -======================= - -Limit: skip=0, fetch=100 - Sort: numwait DESC NULLS FIRST, supplier.s_name ASC NULLS LAST, fetch=100 - Projection: supplier.s_name, count(*) AS numwait - Aggregate: groupBy=[[supplier.s_name]], aggr=[[count(Int64(1)) AS count(*)]] - Projection: supplier.s_name - LeftAnti Join: l1.l_orderkey = __correlated_sq_2.l_orderkey Filter: __correlated_sq_2.l_suppkey != l1.l_suppkey - LeftSemi Join: l1.l_orderkey = __correlated_sq_1.l_orderkey Filter: __correlated_sq_1.l_suppkey != l1.l_suppkey - Projection: supplier.s_name, l1.l_orderkey, l1.l_suppkey - Inner Join: supplier.s_nationkey = nation.n_nationkey - Projection: supplier.s_name, supplier.s_nationkey, l1.l_orderkey, l1.l_suppkey - Inner Join: l1.l_orderkey = orders.o_orderkey - Projection: supplier.s_name, supplier.s_nationkey, l1.l_orderkey, l1.l_suppkey - Inner Join: supplier.s_suppkey = l1.l_suppkey - TableScan: supplier projection=[s_suppkey, s_name, s_nationkey] - SubqueryAlias: l1 - Projection: lineitem.l_orderkey, lineitem.l_suppkey - Filter: lineitem.l_receiptdate > lineitem.l_commitdate - TableScan: lineitem projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], partial_filters=[lineitem.l_receiptdate > lineitem.l_commitdate] - Projection: orders.o_orderkey - Filter: orders.o_orderstatus = Utf8("F") - TableScan: orders projection=[o_orderkey, o_orderstatus], partial_filters=[orders.o_orderstatus = Utf8("F")] - Projection: nation.n_nationkey - Filter: nation.n_name = Utf8("ARGENTINA") - TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("ARGENTINA")] - SubqueryAlias: __correlated_sq_1 - SubqueryAlias: l2 - TableScan: lineitem projection=[l_orderkey, l_suppkey] - SubqueryAlias: __correlated_sq_2 - SubqueryAlias: l3 - Projection: lineitem.l_orderkey, lineitem.l_suppkey - Filter: lineitem.l_receiptdate > lineitem.l_commitdate - TableScan: lineitem projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], partial_filters=[lineitem.l_receiptdate > lineitem.l_commitdate] - -DataFusion Physical Plan -======================== - -GlobalLimitExec: skip=0, fetch=100 - SortPreservingMergeExec: [numwait@1 DESC,s_name@0 ASC NULLS LAST], fetch=100 - SortExec: TopK(fetch=100), expr=[numwait@1 DESC,s_name@0 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[s_name@0 as s_name, count(*)@1 as numwait] - AggregateExec: mode=FinalPartitioned, gby=[s_name@0 as s_name], aggr=[count(*)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_name@0], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[s_name@0 as s_name], aggr=[count(*)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0, projection=[s_name@0] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0 - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_orderkey@1], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@1)], projection=[s_name@1, l_orderkey@3, l_suppkey@4] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ProjectionExec: expr=[n_nationkey@0 as n_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = ARGENTINA - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ARGENTINA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ARGENTINA AND ARGENTINA <= n_name_max@1 END, required_guarantees=[n_name in (ARGENTINA)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_nationkey@1], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@2)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 - ProjectionExec: expr=[o_orderkey@0 as o_orderkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderstatus@1 = F - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_orderstatus], predicate=o_orderstatus@2 = F, pruning_predicate=CASE WHEN o_orderstatus_null_count@2 = o_orderstatus_row_count@3 THEN false ELSE o_orderstatus_min@0 <= F AND F <= o_orderstatus_max@1 END, required_guarantees=[o_orderstatus in (F)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_orderkey@2], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_suppkey@1], 4), input_partitions=4 - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_receiptdate@3 > l_commitdate@2 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_suppkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_receiptdate@3 > l_commitdate@2 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 - -RaySQL Plan -=========== - -Query Stage #0 (2 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ProjectionExec: expr=[n_nationkey@0 as n_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = ARGENTINA - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ARGENTINA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ARGENTINA AND ARGENTINA <= n_name_max@1 END, required_guarantees=[n_name in (ARGENTINA)] - -Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - ProjectionExec: expr=[o_orderkey@0 as o_orderkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderstatus@1 = F - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_orderstatus], predicate=o_orderstatus@2 = F, pruning_predicate=CASE WHEN o_orderstatus_null_count@2 = o_orderstatus_row_count@3 THEN false ELSE o_orderstatus_min@0 <= F AND F <= o_orderstatus_max@1 END, required_guarantees=[o_orderstatus in (F)] - -Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_nationkey] - -Query Stage #3 (4 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4)) - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_receiptdate@3 > l_commitdate@2 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 - -Query Stage #4 (4 -> 4): -ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "l_orderkey", index: 2 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4)) - -Query Stage #5 (4 -> 4): -ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "s_nationkey", index: 1 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@2)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "l_orderkey", index: 2 }], 4)) - -Query Stage #6 (4 -> 4): -ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "l_orderkey", index: 1 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@1)], projection=[s_name@1, l_orderkey@3, l_suppkey@4] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "s_nationkey", index: 1 }], 4)) - -Query Stage #7 (4 -> 4): -ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_suppkey] - -Query Stage #8 (4 -> 4): -ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_receiptdate@3 > l_commitdate@2 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 - -Query Stage #9 (4 -> 4): -ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "s_name", index: 0 }], 4)) - AggregateExec: mode=Partial, gby=[s_name@0 as s_name], aggr=[count(*)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0, projection=[s_name@0] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0 - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "l_orderkey", index: 1 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - -Query Stage #10 (4 -> 4): -ShuffleWriterExec(stage_id=10, output_partitioning=Hash([Column { name: "s_name", index: 0 }], 4)) - SortExec: TopK(fetch=100), expr=[numwait@1 DESC,s_name@0 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[s_name@0 as s_name, count(*)@1 as numwait] - AggregateExec: mode=FinalPartitioned, gby=[s_name@0 as s_name], aggr=[count(*)] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "s_name", index: 0 }], 4)) - -Query Stage #11 (1 -> 1): -GlobalLimitExec: skip=0, fetch=100 - SortPreservingMergeExec: [numwait@1 DESC,s_name@0 ASC NULLS LAST], fetch=100 - ShuffleReaderExec(stage_id=10, input_partitioning=Hash([Column { name: "s_name", index: 0 }], 4)) - diff --git a/testdata/expected-plans/q22.txt b/testdata/expected-plans/q22.txt deleted file mode 100644 index 427795a..0000000 --- a/testdata/expected-plans/q22.txt +++ /dev/null @@ -1,99 +0,0 @@ -DataFusion Logical Plan -======================= - -Sort: custsale.cntrycode ASC NULLS LAST - Projection: custsale.cntrycode, count(*) AS numcust, sum(custsale.c_acctbal) AS totacctbal - Aggregate: groupBy=[[custsale.cntrycode]], aggr=[[count(Int64(1)) AS count(*), sum(custsale.c_acctbal)]] - SubqueryAlias: custsale - Projection: substr(customer.c_phone, Int64(1), Int64(2)) AS cntrycode, customer.c_acctbal - Inner Join: Filter: CAST(customer.c_acctbal AS Decimal128(15, 6)) > __scalar_sq_2.avg(customer.c_acctbal) - Projection: customer.c_phone, customer.c_acctbal - LeftAnti Join: customer.c_custkey = __correlated_sq_1.o_custkey - Filter: substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("24"), Utf8("34"), Utf8("16"), Utf8("30"), Utf8("33"), Utf8("14"), Utf8("13")]) - TableScan: customer projection=[c_custkey, c_phone, c_acctbal], partial_filters=[substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("24"), Utf8("34"), Utf8("16"), Utf8("30"), Utf8("33"), Utf8("14"), Utf8("13")])] - SubqueryAlias: __correlated_sq_1 - TableScan: orders projection=[o_custkey] - SubqueryAlias: __scalar_sq_2 - Aggregate: groupBy=[[]], aggr=[[avg(customer.c_acctbal)]] - Projection: customer.c_acctbal - Filter: customer.c_acctbal > Decimal128(Some(0),11,2) AND substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("24"), Utf8("34"), Utf8("16"), Utf8("30"), Utf8("33"), Utf8("14"), Utf8("13")]) - TableScan: customer projection=[c_phone, c_acctbal], partial_filters=[customer.c_acctbal > Decimal128(Some(0),11,2) AS customer.c_acctbal > Decimal128(Some(0),30,15), substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("24"), Utf8("34"), Utf8("16"), Utf8("30"), Utf8("33"), Utf8("14"), Utf8("13")]), customer.c_acctbal > Decimal128(Some(0),11,2)] - -DataFusion Physical Plan -======================== - -SortPreservingMergeExec: [cntrycode@0 ASC NULLS LAST] - SortExec: expr=[cntrycode@0 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[cntrycode@0 as cntrycode, count(*)@1 as numcust, sum(custsale.c_acctbal)@2 as totacctbal] - AggregateExec: mode=FinalPartitioned, gby=[cntrycode@0 as cntrycode], aggr=[count(*), sum(custsale.c_acctbal)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([cntrycode@0], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[cntrycode@0 as cntrycode], aggr=[count(*), sum(custsale.c_acctbal)] - ProjectionExec: expr=[substr(c_phone@1, 1, 2) as cntrycode, c_acctbal@2 as c_acctbal] - NestedLoopJoinExec: join_type=Inner, filter=CAST(c_acctbal@0 AS Decimal128(15, 6)) > avg(customer.c_acctbal)@1 - AggregateExec: mode=Final, gby=[], aggr=[avg(customer.c_acctbal)] - CoalescePartitionsExec - AggregateExec: mode=Partial, gby=[], aggr=[avg(customer.c_acctbal)] - ProjectionExec: expr=[c_acctbal@1 as c_acctbal] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: c_acctbal@1 > Some(0),11,2 AND Use substr(c_phone@0, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_0.parquet:0..3472684, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:0..3399474], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:3399474..3842490, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_2.parquet:0..3498085, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:0..2931057], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:2931057..3110748, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..6692467], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6692467..6784093, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..6780531]]}, projection=[c_phone, c_acctbal], predicate=c_acctbal@5 > Some(0),11,2 AND Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) AND c_acctbal@5 > Some(0),11,2, pruning_predicate=CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END AND CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END, required_guarantees=[] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(c_custkey@0, o_custkey@0)], projection=[c_phone@1, c_acctbal@2] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: Use substr(c_phone@1, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_0.parquet:0..3472684, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:0..3399474], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:3399474..3842490, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_2.parquet:0..3498085, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:0..2931057], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:2931057..3110748, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..6692467], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6692467..6784093, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..6780531]]}, projection=[c_custkey, c_phone, c_acctbal], predicate=Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_custkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_custkey] - -RaySQL Plan -=========== - -Query Stage #0 (4 -> 1): -ShuffleWriterExec(stage_id=0, output_partitioning=UnknownPartitioning(4)) - AggregateExec: mode=Partial, gby=[], aggr=[avg(customer.c_acctbal)] - ProjectionExec: expr=[c_acctbal@1 as c_acctbal] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: c_acctbal@1 > Some(0),11,2 AND Use substr(c_phone@0, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_0.parquet:0..3472684, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:0..3399474], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:3399474..3842490, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_2.parquet:0..3498085, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:0..2931057], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:2931057..3110748, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..6692467], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6692467..6784093, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..6780531]]}, projection=[c_phone, c_acctbal], predicate=c_acctbal@5 > Some(0),11,2 AND Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) AND c_acctbal@5 > Some(0),11,2, pruning_predicate=CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END AND CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END, required_guarantees=[] - -Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: Use substr(c_phone@1, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_0.parquet:0..3472684, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:0..3399474], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:3399474..3842490, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_2.parquet:0..3498085, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:0..2931057], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:2931057..3110748, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..6692467], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6692467..6784093, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..6780531]]}, projection=[c_custkey, c_phone, c_acctbal], predicate=Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - -Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "o_custkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_custkey] - -Query Stage #3 (4 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "cntrycode", index: 0 }], 4)) - AggregateExec: mode=Partial, gby=[cntrycode@0 as cntrycode], aggr=[count(*), sum(custsale.c_acctbal)] - ProjectionExec: expr=[substr(c_phone@1, 1, 2) as cntrycode, c_acctbal@2 as c_acctbal] - NestedLoopJoinExec: join_type=Inner, filter=CAST(c_acctbal@0 AS Decimal128(15, 6)) > avg(customer.c_acctbal)@1 - AggregateExec: mode=Final, gby=[], aggr=[avg(customer.c_acctbal)] - CoalescePartitionsExec - ShuffleReaderExec(stage_id=0, input_partitioning=UnknownPartitioning(4)) - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(c_custkey@0, o_custkey@0)], projection=[c_phone@1, c_acctbal@2] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "o_custkey", index: 0 }], 4)) - -Query Stage #4 (4 -> 4): -ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "cntrycode", index: 0 }], 4)) - SortExec: expr=[cntrycode@0 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[cntrycode@0 as cntrycode, count(*)@1 as numcust, sum(custsale.c_acctbal)@2 as totacctbal] - AggregateExec: mode=FinalPartitioned, gby=[cntrycode@0 as cntrycode], aggr=[count(*), sum(custsale.c_acctbal)] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "cntrycode", index: 0 }], 4)) - -Query Stage #5 (4 -> 1): -SortPreservingMergeExec: [cntrycode@0 ASC NULLS LAST] - ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "cntrycode", index: 0 }], 4)) - diff --git a/testdata/expected-plans/q3.txt b/testdata/expected-plans/q3.txt deleted file mode 100644 index 5fbfd5b..0000000 --- a/testdata/expected-plans/q3.txt +++ /dev/null @@ -1,110 +0,0 @@ -DataFusion Logical Plan -======================= - -Limit: skip=0, fetch=10 - Sort: revenue DESC NULLS FIRST, orders.o_orderdate ASC NULLS LAST, fetch=10 - Projection: lineitem.l_orderkey, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue, orders.o_orderdate, orders.o_shippriority - Aggregate: groupBy=[[lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority]], aggr=[[sum(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] - Projection: orders.o_orderdate, orders.o_shippriority, lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount - Inner Join: orders.o_orderkey = lineitem.l_orderkey - Projection: orders.o_orderkey, orders.o_orderdate, orders.o_shippriority - Inner Join: customer.c_custkey = orders.o_custkey - Projection: customer.c_custkey - Filter: customer.c_mktsegment = Utf8("BUILDING") - TableScan: customer projection=[c_custkey, c_mktsegment], partial_filters=[customer.c_mktsegment = Utf8("BUILDING")] - Filter: orders.o_orderdate < Date32("1995-03-15") - TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], partial_filters=[orders.o_orderdate < Date32("1995-03-15")] - Projection: lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount - Filter: lineitem.l_shipdate > Date32("1995-03-15") - TableScan: lineitem projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], partial_filters=[lineitem.l_shipdate > Date32("1995-03-15")] - -DataFusion Physical Plan -======================== - -GlobalLimitExec: skip=0, fetch=10 - SortPreservingMergeExec: [revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], fetch=10 - SortExec: TopK(fetch=10), expr=[revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@3 as revenue, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority] - AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_orderkey@0, o_orderdate@1, o_shippriority@2], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[l_orderkey@2 as l_orderkey, o_orderdate@0 as o_orderdate, o_shippriority@1 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, o_shippriority@2, l_orderkey@3, l_extendedprice@4, l_discount@5] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[o_orderkey@1, o_orderdate@3, o_shippriority@4] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 - ProjectionExec: expr=[c_custkey@0 as c_custkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: c_mktsegment@1 = BUILDING - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_0.parquet:0..3472684, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:0..3399474], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:3399474..3842490, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_2.parquet:0..3498085, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:0..2931057], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:2931057..3110748, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..6692467], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6692467..6784093, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..6780531]]}, projection=[c_custkey, c_mktsegment], predicate=c_mktsegment@6 = BUILDING, pruning_predicate=CASE WHEN c_mktsegment_null_count@2 = c_mktsegment_row_count@3 THEN false ELSE c_mktsegment_min@0 <= BUILDING AND BUILDING <= c_mktsegment_max@1 END, required_guarantees=[c_mktsegment in (BUILDING)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 < 1995-03-15 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], predicate=o_orderdate@4 < 1995-03-15, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@0 < 1995-03-15 END, required_guarantees=[] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@3 > 1995-03-15 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 > 1995-03-15, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 > 1995-03-15 END, required_guarantees=[] - -RaySQL Plan -=========== - -Query Stage #0 (4 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - ProjectionExec: expr=[c_custkey@0 as c_custkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: c_mktsegment@1 = BUILDING - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_0.parquet:0..3472684, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:0..3399474], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:3399474..3842490, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_2.parquet:0..3498085, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:0..2931057], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:2931057..3110748, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..6692467], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6692467..6784093, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..6780531]]}, projection=[c_custkey, c_mktsegment], predicate=c_mktsegment@6 = BUILDING, pruning_predicate=CASE WHEN c_mktsegment_null_count@2 = c_mktsegment_row_count@3 THEN false ELSE c_mktsegment_min@0 <= BUILDING AND BUILDING <= c_mktsegment_max@1 END, required_guarantees=[c_mktsegment in (BUILDING)] - -Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 < 1995-03-15 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], predicate=o_orderdate@4 < 1995-03-15, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@0 < 1995-03-15 END, required_guarantees=[] - -Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[o_orderkey@1, o_orderdate@3, o_shippriority@4] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) - -Query Stage #3 (4 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@3 > 1995-03-15 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 > 1995-03-15, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 > 1995-03-15 END, required_guarantees=[] - -Query Stage #4 (4 -> 4): -ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }, Column { name: "o_orderdate", index: 1 }, Column { name: "o_shippriority", index: 2 }], 4)) - AggregateExec: mode=Partial, gby=[l_orderkey@2 as l_orderkey, o_orderdate@0 as o_orderdate, o_shippriority@1 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, o_shippriority@2, l_orderkey@3, l_extendedprice@4, l_discount@5] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - -Query Stage #5 (4 -> 4): -ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }, Column { name: "o_orderdate", index: 2 }, Column { name: "o_shippriority", index: 3 }], 4)) - SortExec: TopK(fetch=10), expr=[revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@3 as revenue, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority] - AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }, Column { name: "o_orderdate", index: 1 }, Column { name: "o_shippriority", index: 2 }], 4)) - -Query Stage #6 (1 -> 1): -GlobalLimitExec: skip=0, fetch=10 - SortPreservingMergeExec: [revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], fetch=10 - ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }, Column { name: "o_orderdate", index: 2 }, Column { name: "o_shippriority", index: 3 }], 4)) - diff --git a/testdata/expected-plans/q4.txt b/testdata/expected-plans/q4.txt deleted file mode 100644 index a8e319d..0000000 --- a/testdata/expected-plans/q4.txt +++ /dev/null @@ -1,80 +0,0 @@ -DataFusion Logical Plan -======================= - -Sort: orders.o_orderpriority ASC NULLS LAST - Projection: orders.o_orderpriority, count(*) AS order_count - Aggregate: groupBy=[[orders.o_orderpriority]], aggr=[[count(Int64(1)) AS count(*)]] - Projection: orders.o_orderpriority - LeftSemi Join: orders.o_orderkey = __correlated_sq_1.l_orderkey - Projection: orders.o_orderkey, orders.o_orderpriority - Filter: orders.o_orderdate >= Date32("1995-04-01") AND orders.o_orderdate < Date32("1995-07-01") - TableScan: orders projection=[o_orderkey, o_orderdate, o_orderpriority], partial_filters=[orders.o_orderdate >= Date32("1995-04-01"), orders.o_orderdate < Date32("1995-07-01")] - SubqueryAlias: __correlated_sq_1 - Projection: lineitem.l_orderkey - Filter: lineitem.l_receiptdate > lineitem.l_commitdate - TableScan: lineitem projection=[l_orderkey, l_commitdate, l_receiptdate], partial_filters=[lineitem.l_receiptdate > lineitem.l_commitdate] - -DataFusion Physical Plan -======================== - -SortPreservingMergeExec: [o_orderpriority@0 ASC NULLS LAST] - SortExec: expr=[o_orderpriority@0 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[o_orderpriority@0 as o_orderpriority, count(*)@1 as order_count] - AggregateExec: mode=FinalPartitioned, gby=[o_orderpriority@0 as o_orderpriority], aggr=[count(*)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_orderpriority@0], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[o_orderpriority@0 as o_orderpriority], aggr=[count(*)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderpriority@1] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 - ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_orderpriority@2 as o_orderpriority] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@1 >= 1995-04-01 AND o_orderdate@1 < 1995-07-01 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_orderdate, o_orderpriority], predicate=o_orderdate@4 >= 1995-04-01 AND o_orderdate@4 < 1995-07-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-04-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-07-01 END, required_guarantees=[] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 - ProjectionExec: expr=[l_orderkey@0 as l_orderkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_receiptdate@2 > l_commitdate@1 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 - -RaySQL Plan -=========== - -Query Stage #0 (4 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_orderpriority@2 as o_orderpriority] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@1 >= 1995-04-01 AND o_orderdate@1 < 1995-07-01 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_orderdate, o_orderpriority], predicate=o_orderdate@4 >= 1995-04-01 AND o_orderdate@4 < 1995-07-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-04-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-07-01 END, required_guarantees=[] - -Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - ProjectionExec: expr=[l_orderkey@0 as l_orderkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_receiptdate@2 > l_commitdate@1 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 - -Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "o_orderpriority", index: 0 }], 4)) - AggregateExec: mode=Partial, gby=[o_orderpriority@0 as o_orderpriority], aggr=[count(*)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderpriority@1] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - -Query Stage #3 (4 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "o_orderpriority", index: 0 }], 4)) - SortExec: expr=[o_orderpriority@0 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[o_orderpriority@0 as o_orderpriority, count(*)@1 as order_count] - AggregateExec: mode=FinalPartitioned, gby=[o_orderpriority@0 as o_orderpriority], aggr=[count(*)] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "o_orderpriority", index: 0 }], 4)) - -Query Stage #4 (4 -> 1): -SortPreservingMergeExec: [o_orderpriority@0 ASC NULLS LAST] - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "o_orderpriority", index: 0 }], 4)) - diff --git a/testdata/expected-plans/q5.txt b/testdata/expected-plans/q5.txt deleted file mode 100644 index f6270c5..0000000 --- a/testdata/expected-plans/q5.txt +++ /dev/null @@ -1,176 +0,0 @@ -DataFusion Logical Plan -======================= - -Sort: revenue DESC NULLS FIRST - Projection: nation.n_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue - Aggregate: groupBy=[[nation.n_name]], aggr=[[sum(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] - Projection: lineitem.l_extendedprice, lineitem.l_discount, nation.n_name - Inner Join: nation.n_regionkey = region.r_regionkey - Projection: lineitem.l_extendedprice, lineitem.l_discount, nation.n_name, nation.n_regionkey - Inner Join: supplier.s_nationkey = nation.n_nationkey - Projection: lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey - Inner Join: lineitem.l_suppkey = supplier.s_suppkey, customer.c_nationkey = supplier.s_nationkey - Projection: customer.c_nationkey, lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount - Inner Join: orders.o_orderkey = lineitem.l_orderkey - Projection: customer.c_nationkey, orders.o_orderkey - Inner Join: customer.c_custkey = orders.o_custkey - TableScan: customer projection=[c_custkey, c_nationkey] - Projection: orders.o_orderkey, orders.o_custkey - Filter: orders.o_orderdate >= Date32("1994-01-01") AND orders.o_orderdate < Date32("1995-01-01") - TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate], partial_filters=[orders.o_orderdate >= Date32("1994-01-01"), orders.o_orderdate < Date32("1995-01-01")] - TableScan: lineitem projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] - TableScan: supplier projection=[s_suppkey, s_nationkey] - TableScan: nation projection=[n_nationkey, n_name, n_regionkey] - Projection: region.r_regionkey - Filter: region.r_name = Utf8("AFRICA") - TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = Utf8("AFRICA")] - -DataFusion Physical Plan -======================== - -SortPreservingMergeExec: [revenue@1 DESC] - SortExec: expr=[revenue@1 DESC], preserve_partitioning=[true] - ProjectionExec: expr=[n_name@0 as n_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as revenue] - AggregateExec: mode=FinalPartitioned, gby=[n_name@0 as n_name], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_name@0], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[n_name@2 as n_name], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, n_name@3] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = AFRICA - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/8NlZWnksIQjnYDPs_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = AFRICA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= AFRICA AND AFRICA <= r_name_max@1 END, required_guarantees=[r_name in (AFRICA)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_regionkey@3], 4), input_partitions=4 - ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, n_name@0 as n_name, n_regionkey@1 as n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, n_regionkey@2, l_extendedprice@3, l_discount@4] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4 - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1), (s_nationkey@1, c_nationkey@0)], projection=[s_nationkey@1, l_extendedprice@4, l_discount@5] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_suppkey@0, s_nationkey@1], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_suppkey@1, c_nationkey@0], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@1, l_orderkey@0)], projection=[c_nationkey@0, l_suppkey@3, l_extendedprice@4, l_discount@5] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_orderkey@1], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_nationkey@1, o_orderkey@2] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_0.parquet:0..3472684, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:0..3399474], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:3399474..3842490, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_2.parquet:0..3498085, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:0..2931057], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:2931057..3110748, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..6692467], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6692467..6784093, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..6780531]]}, projection=[c_custkey, c_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4 - ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 >= 1994-01-01 AND o_orderdate@2 < 1995-01-01 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1994-01-01 AND o_orderdate@4 < 1995-01-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1994-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-01-01 END, required_guarantees=[] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] - -RaySQL Plan -=========== - -Query Stage #0 (2 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = AFRICA - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/8NlZWnksIQjnYDPs_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = AFRICA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= AFRICA AND AFRICA <= r_name_max@1 END, required_guarantees=[r_name in (AFRICA)] - -Query Stage #1 (2 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] - -Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }, Column { name: "s_nationkey", index: 1 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] - -Query Stage #3 (4 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_0.parquet:0..3472684, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:0..3399474], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:3399474..3842490, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_2.parquet:0..3498085, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:0..2931057], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:2931057..3110748, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..6692467], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6692467..6784093, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..6780531]]}, projection=[c_custkey, c_nationkey] - -Query Stage #4 (4 -> 4): -ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) - ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 >= 1994-01-01 AND o_orderdate@2 < 1995-01-01 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1994-01-01 AND o_orderdate@4 < 1995-01-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1994-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-01-01 END, required_guarantees=[] - -Query Stage #5 (4 -> 4): -ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "o_orderkey", index: 1 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_nationkey@1, o_orderkey@2] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) - -Query Stage #6 (4 -> 4): -ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] - -Query Stage #7 (4 -> 4): -ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "l_suppkey", index: 1 }, Column { name: "c_nationkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@1, l_orderkey@0)], projection=[c_nationkey@0, l_suppkey@3, l_extendedprice@4, l_discount@5] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "o_orderkey", index: 1 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - -Query Stage #8 (4 -> 4): -ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1), (s_nationkey@1, c_nationkey@0)], projection=[s_nationkey@1, l_extendedprice@4, l_discount@5] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }, Column { name: "s_nationkey", index: 1 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "l_suppkey", index: 1 }, Column { name: "c_nationkey", index: 0 }], 4)) - -Query Stage #9 (4 -> 4): -ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "n_regionkey", index: 3 }], 4)) - ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, n_name@0 as n_name, n_regionkey@1 as n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, n_regionkey@2, l_extendedprice@3, l_discount@4] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) - -Query Stage #10 (4 -> 4): -ShuffleWriterExec(stage_id=10, output_partitioning=Hash([Column { name: "n_name", index: 0 }], 4)) - AggregateExec: mode=Partial, gby=[n_name@2 as n_name], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, n_name@3] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "n_regionkey", index: 3 }], 4)) - -Query Stage #11 (4 -> 4): -ShuffleWriterExec(stage_id=11, output_partitioning=Hash([Column { name: "n_name", index: 0 }], 4)) - SortExec: expr=[revenue@1 DESC], preserve_partitioning=[true] - ProjectionExec: expr=[n_name@0 as n_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as revenue] - AggregateExec: mode=FinalPartitioned, gby=[n_name@0 as n_name], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=10, input_partitioning=Hash([Column { name: "n_name", index: 0 }], 4)) - -Query Stage #12 (4 -> 1): -SortPreservingMergeExec: [revenue@1 DESC] - ShuffleReaderExec(stage_id=11, input_partitioning=Hash([Column { name: "n_name", index: 0 }], 4)) - diff --git a/testdata/expected-plans/q6.txt b/testdata/expected-plans/q6.txt deleted file mode 100644 index 9d62863..0000000 --- a/testdata/expected-plans/q6.txt +++ /dev/null @@ -1,38 +0,0 @@ -DataFusion Logical Plan -======================= - -Projection: sum(lineitem.l_extendedprice * lineitem.l_discount) AS revenue - Aggregate: groupBy=[[]], aggr=[[sum(lineitem.l_extendedprice * lineitem.l_discount)]] - Projection: lineitem.l_extendedprice, lineitem.l_discount - Filter: lineitem.l_shipdate >= Date32("1994-01-01") AND lineitem.l_shipdate < Date32("1995-01-01") AND lineitem.l_discount >= Decimal128(Some(3),11,2) AND lineitem.l_discount <= Decimal128(Some(5),11,2) AND lineitem.l_quantity < Decimal128(Some(2400),11,2) - TableScan: lineitem projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], partial_filters=[lineitem.l_shipdate >= Date32("1994-01-01"), lineitem.l_shipdate < Date32("1995-01-01"), lineitem.l_discount >= Decimal128(Some(3),11,2), lineitem.l_discount <= Decimal128(Some(5),11,2), lineitem.l_quantity < Decimal128(Some(2400),11,2)] - -DataFusion Physical Plan -======================== - -ProjectionExec: expr=[sum(lineitem.l_extendedprice * lineitem.l_discount)@0 as revenue] - AggregateExec: mode=Final, gby=[], aggr=[sum(lineitem.l_extendedprice * lineitem.l_discount)] - CoalescePartitionsExec - AggregateExec: mode=Partial, gby=[], aggr=[sum(lineitem.l_extendedprice * lineitem.l_discount)] - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@3 >= 1994-01-01 AND l_shipdate@3 < 1995-01-01 AND l_discount@2 >= Some(3),11,2 AND l_discount@2 <= Some(5),11,2 AND l_quantity@0 < Some(2400),11,2 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1994-01-01 AND l_shipdate@10 < 1995-01-01 AND l_discount@6 >= Some(3),11,2 AND l_discount@6 <= Some(5),11,2 AND l_quantity@4 < Some(2400),11,2, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1994-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-01-01 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_max@4 >= Some(3),11,2 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_min@7 <= Some(5),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@8 < Some(2400),11,2 END, required_guarantees=[] - -RaySQL Plan -=========== - -Query Stage #0 (4 -> 1): -ShuffleWriterExec(stage_id=0, output_partitioning=UnknownPartitioning(4)) - AggregateExec: mode=Partial, gby=[], aggr=[sum(lineitem.l_extendedprice * lineitem.l_discount)] - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@3 >= 1994-01-01 AND l_shipdate@3 < 1995-01-01 AND l_discount@2 >= Some(3),11,2 AND l_discount@2 <= Some(5),11,2 AND l_quantity@0 < Some(2400),11,2 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1994-01-01 AND l_shipdate@10 < 1995-01-01 AND l_discount@6 >= Some(3),11,2 AND l_discount@6 <= Some(5),11,2 AND l_quantity@4 < Some(2400),11,2, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1994-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-01-01 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_max@4 >= Some(3),11,2 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_min@7 <= Some(5),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@8 < Some(2400),11,2 END, required_guarantees=[] - -Query Stage #1 (1 -> 1): -ProjectionExec: expr=[sum(lineitem.l_extendedprice * lineitem.l_discount)@0 as revenue] - AggregateExec: mode=Final, gby=[], aggr=[sum(lineitem.l_extendedprice * lineitem.l_discount)] - CoalescePartitionsExec - ShuffleReaderExec(stage_id=0, input_partitioning=UnknownPartitioning(4)) - diff --git a/testdata/expected-plans/q7.txt b/testdata/expected-plans/q7.txt deleted file mode 100644 index ed4009c..0000000 --- a/testdata/expected-plans/q7.txt +++ /dev/null @@ -1,183 +0,0 @@ -DataFusion Logical Plan -======================= - -Sort: shipping.supp_nation ASC NULLS LAST, shipping.cust_nation ASC NULLS LAST, shipping.l_year ASC NULLS LAST - Projection: shipping.supp_nation, shipping.cust_nation, shipping.l_year, sum(shipping.volume) AS revenue - Aggregate: groupBy=[[shipping.supp_nation, shipping.cust_nation, shipping.l_year]], aggr=[[sum(shipping.volume)]] - SubqueryAlias: shipping - Projection: n1.n_name AS supp_nation, n2.n_name AS cust_nation, date_part(Utf8("YEAR"), lineitem.l_shipdate) AS l_year, lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount) AS volume - Inner Join: customer.c_nationkey = n2.n_nationkey Filter: n1.n_name = Utf8("GERMANY") AND n2.n_name = Utf8("IRAQ") OR n1.n_name = Utf8("IRAQ") AND n2.n_name = Utf8("GERMANY") - Projection: lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate, customer.c_nationkey, n1.n_name - Inner Join: supplier.s_nationkey = n1.n_nationkey - Projection: supplier.s_nationkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate, customer.c_nationkey - Inner Join: orders.o_custkey = customer.c_custkey - Projection: supplier.s_nationkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate, orders.o_custkey - Inner Join: lineitem.l_orderkey = orders.o_orderkey - Projection: supplier.s_nationkey, lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate - Inner Join: supplier.s_suppkey = lineitem.l_suppkey - TableScan: supplier projection=[s_suppkey, s_nationkey] - Filter: lineitem.l_shipdate >= Date32("1995-01-01") AND lineitem.l_shipdate <= Date32("1996-12-31") - TableScan: lineitem projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], partial_filters=[lineitem.l_shipdate >= Date32("1995-01-01"), lineitem.l_shipdate <= Date32("1996-12-31")] - TableScan: orders projection=[o_orderkey, o_custkey] - TableScan: customer projection=[c_custkey, c_nationkey] - SubqueryAlias: n1 - Filter: nation.n_name = Utf8("GERMANY") OR nation.n_name = Utf8("IRAQ") - TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("GERMANY") OR nation.n_name = Utf8("IRAQ")] - SubqueryAlias: n2 - Filter: nation.n_name = Utf8("IRAQ") OR nation.n_name = Utf8("GERMANY") - TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("IRAQ") OR nation.n_name = Utf8("GERMANY")] - -DataFusion Physical Plan -======================== - -SortPreservingMergeExec: [supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS LAST,l_year@2 ASC NULLS LAST] - SortExec: expr=[supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS LAST,l_year@2 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year, sum(shipping.volume)@3 as revenue] - AggregateExec: mode=FinalPartitioned, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[sum(shipping.volume)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([supp_nation@0, cust_nation@1, l_year@2], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[sum(shipping.volume)] - ProjectionExec: expr=[n_name@4 as supp_nation, n_name@0 as cust_nation, date_part(YEAR, l_shipdate@3) as l_year, l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as volume] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], filter=n_name@0 = GERMANY AND n_name@1 = IRAQ OR n_name@0 = IRAQ AND n_name@1 = GERMANY, projection=[n_name@1, l_extendedprice@2, l_discount@3, l_shipdate@4, n_name@6] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = IRAQ OR n_name@1 = GERMANY - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = IRAQ OR n_name@1 = GERMANY, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([c_nationkey@3], 4), input_partitions=4 - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_shipdate@3 as l_shipdate, c_nationkey@4 as c_nationkey, n_name@0 as n_name] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@0)], projection=[n_name@1, l_extendedprice@3, l_discount@4, l_shipdate@5, c_nationkey@6] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = GERMANY OR n_name@1 = IRAQ - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = GERMANY OR n_name@1 = IRAQ, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_nationkey@0], 4), input_partitions=4 - ProjectionExec: expr=[s_nationkey@1 as s_nationkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, l_shipdate@4 as l_shipdate, c_nationkey@0 as c_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@4)], projection=[c_nationkey@1, s_nationkey@2, l_extendedprice@3, l_discount@4, l_shipdate@5] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_0.parquet:0..3472684, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:0..3399474], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:3399474..3842490, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_2.parquet:0..3498085, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:0..2931057], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:2931057..3110748, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..6692467], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6692467..6784093, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..6780531]]}, projection=[c_custkey, c_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_custkey@4], 4), input_partitions=4 - ProjectionExec: expr=[s_nationkey@1 as s_nationkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, l_shipdate@4 as l_shipdate, o_custkey@0 as o_custkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@1)], projection=[o_custkey@1, s_nationkey@2, l_extendedprice@4, l_discount@5, l_shipdate@6] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_custkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_orderkey@1], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5, l_shipdate@6] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_suppkey@1], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@4 >= 1995-01-01 AND l_shipdate@4 <= 1996-12-31 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-01-01 AND l_shipdate@10 <= 1996-12-31, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 <= 1996-12-31 END, required_guarantees=[] - -RaySQL Plan -=========== - -Query Stage #0 (2 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = IRAQ OR n_name@1 = GERMANY - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = IRAQ OR n_name@1 = GERMANY, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] - -Query Stage #1 (2 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = GERMANY OR n_name@1 = IRAQ - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = GERMANY OR n_name@1 = IRAQ, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] - -Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_0.parquet:0..3472684, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:0..3399474], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:3399474..3842490, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_2.parquet:0..3498085, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:0..2931057], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:2931057..3110748, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..6692467], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6692467..6784093, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..6780531]]}, projection=[c_custkey, c_nationkey] - -Query Stage #3 (4 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_custkey] - -Query Stage #4 (4 -> 4): -ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] - -Query Stage #5 (4 -> 4): -ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@4 >= 1995-01-01 AND l_shipdate@4 <= 1996-12-31 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-01-01 AND l_shipdate@10 <= 1996-12-31, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 <= 1996-12-31 END, required_guarantees=[] - -Query Stage #6 (4 -> 4): -ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "l_orderkey", index: 1 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5, l_shipdate@6] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4)) - -Query Stage #7 (4 -> 4): -ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "o_custkey", index: 4 }], 4)) - ProjectionExec: expr=[s_nationkey@1 as s_nationkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, l_shipdate@4 as l_shipdate, o_custkey@0 as o_custkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@1)], projection=[o_custkey@1, s_nationkey@2, l_extendedprice@4, l_discount@5, l_shipdate@6] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "l_orderkey", index: 1 }], 4)) - -Query Stage #8 (4 -> 4): -ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "s_nationkey", index: 0 }], 4)) - ProjectionExec: expr=[s_nationkey@1 as s_nationkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, l_shipdate@4 as l_shipdate, c_nationkey@0 as c_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@4)], projection=[c_nationkey@1, s_nationkey@2, l_extendedprice@3, l_discount@4, l_shipdate@5] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "o_custkey", index: 4 }], 4)) - -Query Stage #9 (4 -> 4): -ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "c_nationkey", index: 3 }], 4)) - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_shipdate@3 as l_shipdate, c_nationkey@4 as c_nationkey, n_name@0 as n_name] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@0)], projection=[n_name@1, l_extendedprice@3, l_discount@4, l_shipdate@5, c_nationkey@6] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "s_nationkey", index: 0 }], 4)) - -Query Stage #10 (4 -> 4): -ShuffleWriterExec(stage_id=10, output_partitioning=Hash([Column { name: "supp_nation", index: 0 }, Column { name: "cust_nation", index: 1 }, Column { name: "l_year", index: 2 }], 4)) - AggregateExec: mode=Partial, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[sum(shipping.volume)] - ProjectionExec: expr=[n_name@4 as supp_nation, n_name@0 as cust_nation, date_part(YEAR, l_shipdate@3) as l_year, l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as volume] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], filter=n_name@0 = GERMANY AND n_name@1 = IRAQ OR n_name@0 = IRAQ AND n_name@1 = GERMANY, projection=[n_name@1, l_extendedprice@2, l_discount@3, l_shipdate@4, n_name@6] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "c_nationkey", index: 3 }], 4)) - -Query Stage #11 (4 -> 4): -ShuffleWriterExec(stage_id=11, output_partitioning=Hash([Column { name: "supp_nation", index: 0 }, Column { name: "cust_nation", index: 1 }, Column { name: "l_year", index: 2 }], 4)) - SortExec: expr=[supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS LAST,l_year@2 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year, sum(shipping.volume)@3 as revenue] - AggregateExec: mode=FinalPartitioned, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[sum(shipping.volume)] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=10, input_partitioning=Hash([Column { name: "supp_nation", index: 0 }, Column { name: "cust_nation", index: 1 }, Column { name: "l_year", index: 2 }], 4)) - -Query Stage #12 (4 -> 1): -SortPreservingMergeExec: [supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS LAST,l_year@2 ASC NULLS LAST] - ShuffleReaderExec(stage_id=11, input_partitioning=Hash([Column { name: "supp_nation", index: 0 }, Column { name: "cust_nation", index: 1 }, Column { name: "l_year", index: 2 }], 4)) - diff --git a/testdata/expected-plans/q8.txt b/testdata/expected-plans/q8.txt deleted file mode 100644 index 42479de..0000000 --- a/testdata/expected-plans/q8.txt +++ /dev/null @@ -1,238 +0,0 @@ -DataFusion Logical Plan -======================= - -Sort: all_nations.o_year ASC NULLS LAST - Projection: all_nations.o_year, sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END) / sum(all_nations.volume) AS mkt_share - Aggregate: groupBy=[[all_nations.o_year]], aggr=[[sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Decimal128(Some(0),35,4) END) AS sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)]] - SubqueryAlias: all_nations - Projection: date_part(Utf8("YEAR"), orders.o_orderdate) AS o_year, lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount) AS volume, n2.n_name AS nation - Inner Join: n1.n_regionkey = region.r_regionkey - Projection: lineitem.l_extendedprice, lineitem.l_discount, orders.o_orderdate, n1.n_regionkey, n2.n_name - Inner Join: supplier.s_nationkey = n2.n_nationkey - Projection: lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey, orders.o_orderdate, n1.n_regionkey - Inner Join: customer.c_nationkey = n1.n_nationkey - Projection: lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey, orders.o_orderdate, customer.c_nationkey - Inner Join: orders.o_custkey = customer.c_custkey - Projection: lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey, orders.o_custkey, orders.o_orderdate - Inner Join: lineitem.l_orderkey = orders.o_orderkey - Projection: lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey - Inner Join: lineitem.l_suppkey = supplier.s_suppkey - Projection: lineitem.l_orderkey, lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount - Inner Join: part.p_partkey = lineitem.l_partkey - Projection: part.p_partkey - Filter: part.p_type = Utf8("LARGE PLATED STEEL") - TableScan: part projection=[p_partkey, p_type], partial_filters=[part.p_type = Utf8("LARGE PLATED STEEL")] - TableScan: lineitem projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] - TableScan: supplier projection=[s_suppkey, s_nationkey] - Filter: orders.o_orderdate >= Date32("1995-01-01") AND orders.o_orderdate <= Date32("1996-12-31") - TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate], partial_filters=[orders.o_orderdate >= Date32("1995-01-01"), orders.o_orderdate <= Date32("1996-12-31")] - TableScan: customer projection=[c_custkey, c_nationkey] - SubqueryAlias: n1 - TableScan: nation projection=[n_nationkey, n_regionkey] - SubqueryAlias: n2 - TableScan: nation projection=[n_nationkey, n_name] - Projection: region.r_regionkey - Filter: region.r_name = Utf8("MIDDLE EAST") - TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = Utf8("MIDDLE EAST")] - -DataFusion Physical Plan -======================== - -SortPreservingMergeExec: [o_year@0 ASC NULLS LAST] - SortExec: expr=[o_year@0 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[o_year@0 as o_year, sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END)@1 / sum(all_nations.volume)@2 as mkt_share] - AggregateExec: mode=FinalPartitioned, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_year@0], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)] - ProjectionExec: expr=[date_part(YEAR, o_orderdate@2) as o_year, l_extendedprice@0 * (Some(1),20,0 - l_discount@1) as volume, n_name@3 as nation] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, o_orderdate@3, n_name@5] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = MIDDLE EAST - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/8NlZWnksIQjnYDPs_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = MIDDLE EAST, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= MIDDLE EAST AND MIDDLE EAST <= r_name_max@1 END, required_guarantees=[r_name in (MIDDLE EAST)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_regionkey@3], 4), input_partitions=4 - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, o_orderdate@3 as o_orderdate, n_regionkey@4 as n_regionkey, n_name@0 as n_name] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, l_extendedprice@2, l_discount@3, o_orderdate@5, n_regionkey@6] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4 - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, n_regionkey@0 as n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, c_nationkey@4)], projection=[n_regionkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@5] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([c_nationkey@4], 4), input_partitions=4 - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, c_nationkey@0 as c_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@3)], projection=[c_nationkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@6] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_0.parquet:0..3472684, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:0..3399474], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:3399474..3842490, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_2.parquet:0..3498085, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:0..2931057], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:2931057..3110748, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..6692467], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6692467..6784093, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..6780531]]}, projection=[c_custkey, c_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_custkey@3], 4), input_partitions=4 - ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, o_custkey@0 as o_custkey, o_orderdate@1 as o_orderdate] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_custkey@1, o_orderdate@2, l_extendedprice@4, l_discount@5, s_nationkey@6] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 >= 1995-01-01 AND o_orderdate@2 <= 1996-12-31 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1995-01-01 AND o_orderdate@4 <= 1996-12-31, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 <= 1996-12-31 END, required_guarantees=[] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 - ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_suppkey@1], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_suppkey@3, l_extendedprice@4, l_discount@5] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 - ProjectionExec: expr=[p_partkey@0 as p_partkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_type@1 = LARGE PLATED STEEL - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet:0..3454243, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:0..61547], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:61547..3452413, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:0..124924], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:124924..2056321, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_1.parquet:0..1526046, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:0..58347], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:58347..1788444, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_3.parquet:0..1785690]]}, projection=[p_partkey, p_type], predicate=p_type@4 = LARGE PLATED STEEL, pruning_predicate=CASE WHEN p_type_null_count@2 = p_type_row_count@3 THEN false ELSE p_type_min@0 <= LARGE PLATED STEEL AND LARGE PLATED STEEL <= p_type_max@1 END, required_guarantees=[p_type in (LARGE PLATED STEEL)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_partkey@1], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] - -RaySQL Plan -=========== - -Query Stage #0 (2 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = MIDDLE EAST - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/8NlZWnksIQjnYDPs_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = MIDDLE EAST, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= MIDDLE EAST AND MIDDLE EAST <= r_name_max@1 END, required_guarantees=[r_name in (MIDDLE EAST)] - -Query Stage #1 (2 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] - -Query Stage #2 (2 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_regionkey] - -Query Stage #3 (4 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_0.parquet:0..3472684, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:0..3399474], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_1.parquet:3399474..3842490, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_2.parquet:0..3498085, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:0..2931057], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/pWo0Flk4zs96ILRo_3.parquet:2931057..3110748, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..6692467], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6692467..6784093, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..6780531]]}, projection=[c_custkey, c_nationkey] - -Query Stage #4 (4 -> 4): -ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 >= 1995-01-01 AND o_orderdate@2 <= 1996-12-31 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1995-01-01 AND o_orderdate@4 <= 1996-12-31, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 <= 1996-12-31 END, required_guarantees=[] - -Query Stage #5 (4 -> 4): -ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] - -Query Stage #6 (4 -> 4): -ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - ProjectionExec: expr=[p_partkey@0 as p_partkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_type@1 = LARGE PLATED STEEL - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet:0..3454243, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:0..61547], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:61547..3452413, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:0..124924], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:124924..2056321, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_1.parquet:0..1526046, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:0..58347], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:58347..1788444, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_3.parquet:0..1785690]]}, projection=[p_partkey, p_type], predicate=p_type@4 = LARGE PLATED STEEL, pruning_predicate=CASE WHEN p_type_null_count@2 = p_type_row_count@3 THEN false ELSE p_type_min@0 <= LARGE PLATED STEEL AND LARGE PLATED STEEL <= p_type_max@1 END, required_guarantees=[p_type in (LARGE PLATED STEEL)] - -Query Stage #7 (4 -> 4): -ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "l_partkey", index: 1 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] - -Query Stage #8 (4 -> 4): -ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_suppkey@3, l_extendedprice@4, l_discount@5] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "l_partkey", index: 1 }], 4)) - -Query Stage #9 (4 -> 4): -ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4)) - -Query Stage #10 (4 -> 4): -ShuffleWriterExec(stage_id=10, output_partitioning=Hash([Column { name: "o_custkey", index: 3 }], 4)) - ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, o_custkey@0 as o_custkey, o_orderdate@1 as o_orderdate] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_custkey@1, o_orderdate@2, l_extendedprice@4, l_discount@5, s_nationkey@6] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - -Query Stage #11 (4 -> 4): -ShuffleWriterExec(stage_id=11, output_partitioning=Hash([Column { name: "c_nationkey", index: 4 }], 4)) - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, c_nationkey@0 as c_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@3)], projection=[c_nationkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@6] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=10, input_partitioning=Hash([Column { name: "o_custkey", index: 3 }], 4)) - -Query Stage #12 (4 -> 4): -ShuffleWriterExec(stage_id=12, output_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, n_regionkey@0 as n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, c_nationkey@4)], projection=[n_regionkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@5] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=11, input_partitioning=Hash([Column { name: "c_nationkey", index: 4 }], 4)) - -Query Stage #13 (4 -> 4): -ShuffleWriterExec(stage_id=13, output_partitioning=Hash([Column { name: "n_regionkey", index: 3 }], 4)) - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, o_orderdate@3 as o_orderdate, n_regionkey@4 as n_regionkey, n_name@0 as n_name] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, l_extendedprice@2, l_discount@3, o_orderdate@5, n_regionkey@6] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=12, input_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) - -Query Stage #14 (4 -> 4): -ShuffleWriterExec(stage_id=14, output_partitioning=Hash([Column { name: "o_year", index: 0 }], 4)) - AggregateExec: mode=Partial, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)] - ProjectionExec: expr=[date_part(YEAR, o_orderdate@2) as o_year, l_extendedprice@0 * (Some(1),20,0 - l_discount@1) as volume, n_name@3 as nation] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, o_orderdate@3, n_name@5] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=13, input_partitioning=Hash([Column { name: "n_regionkey", index: 3 }], 4)) - -Query Stage #15 (4 -> 4): -ShuffleWriterExec(stage_id=15, output_partitioning=Hash([Column { name: "o_year", index: 0 }], 4)) - SortExec: expr=[o_year@0 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[o_year@0 as o_year, sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END)@1 / sum(all_nations.volume)@2 as mkt_share] - AggregateExec: mode=FinalPartitioned, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=14, input_partitioning=Hash([Column { name: "o_year", index: 0 }], 4)) - -Query Stage #16 (4 -> 1): -SortPreservingMergeExec: [o_year@0 ASC NULLS LAST] - ShuffleReaderExec(stage_id=15, input_partitioning=Hash([Column { name: "o_year", index: 0 }], 4)) - diff --git a/testdata/expected-plans/q9.txt b/testdata/expected-plans/q9.txt deleted file mode 100644 index 0a14b3a..0000000 --- a/testdata/expected-plans/q9.txt +++ /dev/null @@ -1,172 +0,0 @@ -DataFusion Logical Plan -======================= - -Sort: profit.nation ASC NULLS LAST, profit.o_year DESC NULLS FIRST - Projection: profit.nation, profit.o_year, sum(profit.amount) AS sum_profit - Aggregate: groupBy=[[profit.nation, profit.o_year]], aggr=[[sum(profit.amount)]] - SubqueryAlias: profit - Projection: nation.n_name AS nation, date_part(Utf8("YEAR"), orders.o_orderdate) AS o_year, lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount) - partsupp.ps_supplycost * lineitem.l_quantity AS amount - Inner Join: supplier.s_nationkey = nation.n_nationkey - Projection: lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey, partsupp.ps_supplycost, orders.o_orderdate - Inner Join: lineitem.l_orderkey = orders.o_orderkey - Projection: lineitem.l_orderkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey, partsupp.ps_supplycost - Inner Join: lineitem.l_suppkey = partsupp.ps_suppkey, lineitem.l_partkey = partsupp.ps_partkey - Projection: lineitem.l_orderkey, lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey - Inner Join: lineitem.l_suppkey = supplier.s_suppkey - Projection: lineitem.l_orderkey, lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount - Inner Join: part.p_partkey = lineitem.l_partkey - Projection: part.p_partkey - Filter: part.p_name LIKE Utf8("%moccasin%") - TableScan: part projection=[p_partkey, p_name], partial_filters=[part.p_name LIKE Utf8("%moccasin%")] - TableScan: lineitem projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] - TableScan: supplier projection=[s_suppkey, s_nationkey] - TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost] - TableScan: orders projection=[o_orderkey, o_orderdate] - TableScan: nation projection=[n_nationkey, n_name] - -DataFusion Physical Plan -======================== - -SortPreservingMergeExec: [nation@0 ASC NULLS LAST,o_year@1 DESC] - SortExec: expr=[nation@0 ASC NULLS LAST,o_year@1 DESC], preserve_partitioning=[true] - ProjectionExec: expr=[nation@0 as nation, o_year@1 as o_year, sum(profit.amount)@2 as sum_profit] - AggregateExec: mode=FinalPartitioned, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([nation@0, o_year@1], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)] - ProjectionExec: expr=[n_name@0 as nation, date_part(YEAR, o_orderdate@5) as o_year, l_extendedprice@2 * (Some(1),20,0 - l_discount@3) - ps_supplycost@4 * l_quantity@1 as amount] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[n_name@1, l_quantity@2, l_extendedprice@3, l_discount@4, ps_supplycost@6, o_orderdate@7] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_nationkey@3], 4), input_partitions=4 - ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, ps_supplycost@5 as ps_supplycost, o_orderdate@0 as o_orderdate] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, l_quantity@3, l_extendedprice@4, l_discount@5, s_nationkey@6, ps_supplycost@7] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_orderdate] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 - ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_quantity@2 as l_quantity, l_extendedprice@3 as l_extendedprice, l_discount@4 as l_discount, s_nationkey@5 as s_nationkey, ps_supplycost@0 as ps_supplycost] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@1, l_suppkey@2), (ps_partkey@0, l_partkey@1)], projection=[ps_supplycost@2, l_orderkey@3, l_quantity@6, l_extendedprice@7, l_discount@8, s_nationkey@9] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([ps_suppkey@1, ps_partkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_0.parquet:0..11328263, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:0..9740046], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:9740046..10585781, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_2.parquet:0..9946761, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:0..10275813], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:10275813..10620507, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..20723615], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:20723615..20895812, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..20896109]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_suppkey@2, l_partkey@1], 4), input_partitions=4 - ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_partkey@2 as l_partkey, l_suppkey@3 as l_suppkey, l_quantity@4 as l_quantity, l_extendedprice@5 as l_extendedprice, l_discount@6 as l_discount, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@2)], projection=[s_nationkey@1, l_orderkey@2, l_partkey@3, l_suppkey@4, l_quantity@5, l_extendedprice@6, l_discount@7] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_suppkey@2], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_partkey@2, l_suppkey@3, l_quantity@4, l_extendedprice@5, l_discount@6] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 - ProjectionExec: expr=[p_partkey@0 as p_partkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_name@1 LIKE %moccasin% - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet:0..3454243, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:0..61547], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:61547..3452413, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:0..124924], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:124924..2056321, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_1.parquet:0..1526046, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:0..58347], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:58347..1788444, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_3.parquet:0..1785690]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE %moccasin% - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_partkey@1], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] - -RaySQL Plan -=========== - -Query Stage #0 (2 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/IftthSxZMytggJzA_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] - -Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_0.parquet:0..13941205, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:0..13831262], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_1.parquet:13831262..13895674, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_2.parquet:0..14344446, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:0..13363609], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/2Wpvnmd29krZIYjA_3.parquet:13363609..14369581, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..26766495], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:26766495..27275418, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..27263542]]}, projection=[o_orderkey, o_orderdate] - -Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }, Column { name: "ps_partkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_0.parquet:0..11328263, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:0..9740046], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_1.parquet:9740046..10585781, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_2.parquet:0..9946761, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:0..10275813], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/i4vwrw2iQLnJna7E_3.parquet:10275813..10620507, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..20723615], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:20723615..20895812, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..20896109]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] - -Query Stage #3 (4 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_0.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/8qdC2hv8M5cVyFo2_1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] - -Query Stage #4 (4 -> 4): -ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - ProjectionExec: expr=[p_partkey@0 as p_partkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_name@1 LIKE %moccasin% - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet:0..3454243, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:0..61547], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet:61547..3452413, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:0..124924], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_0.parquet:124924..2056321, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_1.parquet:0..1526046, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:0..58347], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_2.parquet:58347..1788444, home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/zqREsW7hAwjnDl0s_3.parquet:0..1785690]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE %moccasin% - -Query Stage #5 (4 -> 4): -ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "l_partkey", index: 1 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_0.parquet:0..52711469, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:0..51867435], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_1.parquet:51867435..52551505, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_2.parquet:0..52295377, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:0..51599457], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/Q6UwsIXxM7id3Gm2_3.parquet:51599457..52418452, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..103759909], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:103759909..104163524, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..104175286]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] - -Query Stage #6 (4 -> 4): -ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "l_suppkey", index: 2 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_partkey@2, l_suppkey@3, l_quantity@4, l_extendedprice@5, l_discount@6] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "l_partkey", index: 1 }], 4)) - -Query Stage #7 (4 -> 4): -ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "l_suppkey", index: 2 }, Column { name: "l_partkey", index: 1 }], 4)) - ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_partkey@2 as l_partkey, l_suppkey@3 as l_suppkey, l_quantity@4 as l_quantity, l_extendedprice@5 as l_extendedprice, l_discount@6 as l_discount, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@2)], projection=[s_nationkey@1, l_orderkey@2, l_partkey@3, l_suppkey@4, l_quantity@5, l_extendedprice@6, l_discount@7] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "l_suppkey", index: 2 }], 4)) - -Query Stage #8 (4 -> 4): -ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_quantity@2 as l_quantity, l_extendedprice@3 as l_extendedprice, l_discount@4 as l_discount, s_nationkey@5 as s_nationkey, ps_supplycost@0 as ps_supplycost] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@1, l_suppkey@2), (ps_partkey@0, l_partkey@1)], projection=[ps_supplycost@2, l_orderkey@3, l_quantity@6, l_extendedprice@7, l_discount@8, s_nationkey@9] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }, Column { name: "ps_partkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "l_suppkey", index: 2 }, Column { name: "l_partkey", index: 1 }], 4)) - -Query Stage #9 (4 -> 4): -ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4)) - ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, ps_supplycost@5 as ps_supplycost, o_orderdate@0 as o_orderdate] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, l_quantity@3, l_extendedprice@4, l_discount@5, s_nationkey@6, ps_supplycost@7] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - -Query Stage #10 (4 -> 4): -ShuffleWriterExec(stage_id=10, output_partitioning=Hash([Column { name: "nation", index: 0 }, Column { name: "o_year", index: 1 }], 4)) - AggregateExec: mode=Partial, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)] - ProjectionExec: expr=[n_name@0 as nation, date_part(YEAR, o_orderdate@5) as o_year, l_extendedprice@2 * (Some(1),20,0 - l_discount@3) - ps_supplycost@4 * l_quantity@1 as amount] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[n_name@1, l_quantity@2, l_extendedprice@3, l_discount@4, ps_supplycost@6, o_orderdate@7] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4)) - -Query Stage #11 (4 -> 4): -ShuffleWriterExec(stage_id=11, output_partitioning=Hash([Column { name: "nation", index: 0 }, Column { name: "o_year", index: 1 }], 4)) - SortExec: expr=[nation@0 ASC NULLS LAST,o_year@1 DESC], preserve_partitioning=[true] - ProjectionExec: expr=[nation@0 as nation, o_year@1 as o_year, sum(profit.amount)@2 as sum_profit] - AggregateExec: mode=FinalPartitioned, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=10, input_partitioning=Hash([Column { name: "nation", index: 0 }, Column { name: "o_year", index: 1 }], 4)) - -Query Stage #12 (4 -> 1): -SortPreservingMergeExec: [nation@0 ASC NULLS LAST,o_year@1 DESC] - ShuffleReaderExec(stage_id=11, input_partitioning=Hash([Column { name: "nation", index: 0 }, Column { name: "o_year", index: 1 }], 4)) - From b9f6148f8e21652518d680a21d6701fb8f6c7b14 Mon Sep 17 00:00:00 2001 From: Edmondo Porcu Date: Thu, 3 Oct 2024 14:56:33 -0400 Subject: [PATCH 05/14] Trying one partition only --- src/planner.rs | 2 +- testdata/expected-plans/q1.txt | 33 ++++++++ testdata/expected-plans/q10.txt | 71 +++++++++++++++++ testdata/expected-plans/q11.txt | 94 ++++++++++++++++++++++ testdata/expected-plans/q12.txt | 44 +++++++++++ testdata/expected-plans/q13.txt | 49 ++++++++++++ testdata/expected-plans/q14.txt | 41 ++++++++++ testdata/expected-plans/q16.txt | 61 +++++++++++++++ testdata/expected-plans/q17.txt | 57 ++++++++++++++ testdata/expected-plans/q18.txt | 61 +++++++++++++++ testdata/expected-plans/q19.txt | 44 +++++++++++ testdata/expected-plans/q2.txt | 133 ++++++++++++++++++++++++++++++++ testdata/expected-plans/q20.txt | 87 +++++++++++++++++++++ testdata/expected-plans/q21.txt | 107 +++++++++++++++++++++++++ testdata/expected-plans/q22.txt | 62 +++++++++++++++ testdata/expected-plans/q3.txt | 65 ++++++++++++++++ testdata/expected-plans/q4.txt | 51 ++++++++++++ testdata/expected-plans/q5.txt | 90 +++++++++++++++++++++ testdata/expected-plans/q6.txt | 30 +++++++ testdata/expected-plans/q7.txt | 94 ++++++++++++++++++++++ testdata/expected-plans/q8.txt | 124 +++++++++++++++++++++++++++++ testdata/expected-plans/q9.txt | 87 +++++++++++++++++++++ 22 files changed, 1486 insertions(+), 1 deletion(-) create mode 100644 testdata/expected-plans/q1.txt create mode 100644 testdata/expected-plans/q10.txt create mode 100644 testdata/expected-plans/q11.txt create mode 100644 testdata/expected-plans/q12.txt create mode 100644 testdata/expected-plans/q13.txt create mode 100644 testdata/expected-plans/q14.txt create mode 100644 testdata/expected-plans/q16.txt create mode 100644 testdata/expected-plans/q17.txt create mode 100644 testdata/expected-plans/q18.txt create mode 100644 testdata/expected-plans/q19.txt create mode 100644 testdata/expected-plans/q2.txt create mode 100644 testdata/expected-plans/q20.txt create mode 100644 testdata/expected-plans/q21.txt create mode 100644 testdata/expected-plans/q22.txt create mode 100644 testdata/expected-plans/q3.txt create mode 100644 testdata/expected-plans/q4.txt create mode 100644 testdata/expected-plans/q5.txt create mode 100644 testdata/expected-plans/q6.txt create mode 100644 testdata/expected-plans/q7.txt create mode 100644 testdata/expected-plans/q8.txt create mode 100644 testdata/expected-plans/q9.txt diff --git a/src/planner.rs b/src/planner.rs index 5c58663..40934fe 100644 --- a/src/planner.rs +++ b/src/planner.rs @@ -384,7 +384,7 @@ mod test { let data_path = env::var("TPCH_DATA_PATH")?; let file = format!("testdata/queries/q{n}.sql"); let sql = fs::read_to_string(&file)?; - let config = SessionConfig::new().with_target_partitions(4); + let config = SessionConfig::new().with_target_partitions(1); let ctx = SessionContext::with_config(config); let tables = &[ "customer", "lineitem", "nation", "orders", "part", "partsupp", "region", "supplier", diff --git a/testdata/expected-plans/q1.txt b/testdata/expected-plans/q1.txt new file mode 100644 index 0000000..c5b6928 --- /dev/null +++ b/testdata/expected-plans/q1.txt @@ -0,0 +1,33 @@ +DataFusion Logical Plan +======================= + +Sort: lineitem.l_returnflag ASC NULLS LAST, lineitem.l_linestatus ASC NULLS LAST + Projection: lineitem.l_returnflag, lineitem.l_linestatus, sum(lineitem.l_quantity) AS sum_qty, sum(lineitem.l_extendedprice) AS sum_base_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS sum_disc_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax) AS sum_charge, avg(lineitem.l_quantity) AS avg_qty, avg(lineitem.l_extendedprice) AS avg_price, avg(lineitem.l_discount) AS avg_disc, count(*) AS count_order + Aggregate: groupBy=[[lineitem.l_returnflag, lineitem.l_linestatus]], aggr=[[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(__common_expr_1) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(__common_expr_1 * (Decimal128(Some(1),20,0) + lineitem.l_tax)) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(Int64(1)) AS count(*)]] + Projection: lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount) AS __common_expr_1, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_tax, lineitem.l_returnflag, lineitem.l_linestatus + Filter: lineitem.l_shipdate <= Date32("1998-09-24") + TableScan: lineitem projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], partial_filters=[lineitem.l_shipdate <= Date32("1998-09-24")] + +DataFusion Physical Plan +======================== + +SortExec: expr=[l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus, sum(lineitem.l_quantity)@2 as sum_qty, sum(lineitem.l_extendedprice)@3 as sum_base_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@4 as sum_disc_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax)@5 as sum_charge, avg(lineitem.l_quantity)@6 as avg_qty, avg(lineitem.l_extendedprice)@7 as avg_price, avg(lineitem.l_discount)@8 as avg_disc, count(*)@9 as count_order] + AggregateExec: mode=Single, gby=[l_returnflag@5 as l_returnflag, l_linestatus@6 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(*)] + ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, l_quantity@0 as l_quantity, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_tax@3 as l_tax, l_returnflag@4 as l_returnflag, l_linestatus@5 as l_linestatus] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@6 <= 1998-09-24 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], predicate=l_shipdate@10 <= 1998-09-24, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@0 <= 1998-09-24 END, required_guarantees=[] + +RaySQL Plan +=========== + +Query Stage #0 (1 -> 1): +SortExec: expr=[l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus, sum(lineitem.l_quantity)@2 as sum_qty, sum(lineitem.l_extendedprice)@3 as sum_base_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@4 as sum_disc_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax)@5 as sum_charge, avg(lineitem.l_quantity)@6 as avg_qty, avg(lineitem.l_extendedprice)@7 as avg_price, avg(lineitem.l_discount)@8 as avg_disc, count(*)@9 as count_order] + AggregateExec: mode=Single, gby=[l_returnflag@5 as l_returnflag, l_linestatus@6 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(*)] + ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, l_quantity@0 as l_quantity, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_tax@3 as l_tax, l_returnflag@4 as l_returnflag, l_linestatus@5 as l_linestatus] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@6 <= 1998-09-24 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], predicate=l_shipdate@10 <= 1998-09-24, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@0 <= 1998-09-24 END, required_guarantees=[] + diff --git a/testdata/expected-plans/q10.txt b/testdata/expected-plans/q10.txt new file mode 100644 index 0000000..0e84a0d --- /dev/null +++ b/testdata/expected-plans/q10.txt @@ -0,0 +1,71 @@ +DataFusion Logical Plan +======================= + +Limit: skip=0, fetch=20 + Sort: revenue DESC NULLS FIRST, fetch=20 + Projection: customer.c_custkey, customer.c_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue, customer.c_acctbal, nation.n_name, customer.c_address, customer.c_phone, customer.c_comment + Aggregate: groupBy=[[customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment]], aggr=[[sum(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] + Projection: customer.c_custkey, customer.c_name, customer.c_address, customer.c_phone, customer.c_acctbal, customer.c_comment, lineitem.l_extendedprice, lineitem.l_discount, nation.n_name + Inner Join: customer.c_nationkey = nation.n_nationkey + Projection: customer.c_custkey, customer.c_name, customer.c_address, customer.c_nationkey, customer.c_phone, customer.c_acctbal, customer.c_comment, lineitem.l_extendedprice, lineitem.l_discount + Inner Join: orders.o_orderkey = lineitem.l_orderkey + Projection: customer.c_custkey, customer.c_name, customer.c_address, customer.c_nationkey, customer.c_phone, customer.c_acctbal, customer.c_comment, orders.o_orderkey + Inner Join: customer.c_custkey = orders.o_custkey + TableScan: customer projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] + Projection: orders.o_orderkey, orders.o_custkey + Filter: orders.o_orderdate >= Date32("1993-07-01") AND orders.o_orderdate < Date32("1993-10-01") + TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate], partial_filters=[orders.o_orderdate >= Date32("1993-07-01"), orders.o_orderdate < Date32("1993-10-01")] + Projection: lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount + Filter: lineitem.l_returnflag = Utf8("R") + TableScan: lineitem projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], partial_filters=[lineitem.l_returnflag = Utf8("R")] + TableScan: nation projection=[n_nationkey, n_name] + +DataFusion Physical Plan +======================== + +SortExec: TopK(fetch=20), expr=[revenue@2 DESC], preserve_partitioning=[false] + ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@7 as revenue, c_acctbal@2 as c_acctbal, n_name@4 as n_name, c_address@5 as c_address, c_phone@3 as c_phone, c_comment@6 as c_comment] + AggregateExec: mode=Single, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@4 as c_acctbal, c_phone@3 as c_phone, n_name@8 as n_name, c_address@2 as c_address, c_comment@5 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + ProjectionExec: expr=[c_custkey@1 as c_custkey, c_name@2 as c_name, c_address@3 as c_address, c_phone@4 as c_phone, c_acctbal@5 as c_acctbal, c_comment@6 as c_comment, l_extendedprice@7 as l_extendedprice, l_discount@8 as l_discount, n_name@0 as n_name] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], projection=[n_name@1, c_custkey@2, c_name@3, c_address@4, c_phone@6, c_acctbal@7, c_comment@8, l_extendedprice@9, l_discount@10] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@7, l_orderkey@0)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, l_extendedprice@9, l_discount@10] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, o_orderkey@7] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13210681]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] + ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@2 >= 1993-07-01 AND o_orderdate@2 < 1993-10-01 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1993-07-01 AND o_orderdate@4 < 1993-10-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1993-07-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1993-10-01 END, required_guarantees=[] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_returnflag@3 = R + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], predicate=l_returnflag@8 = R, pruning_predicate=CASE WHEN l_returnflag_null_count@2 = l_returnflag_row_count@3 THEN false ELSE l_returnflag_min@0 <= R AND R <= l_returnflag_max@1 END, required_guarantees=[l_returnflag in (R)] + +RaySQL Plan +=========== + +Query Stage #0 (1 -> 1): +SortExec: TopK(fetch=20), expr=[revenue@2 DESC], preserve_partitioning=[false] + ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@7 as revenue, c_acctbal@2 as c_acctbal, n_name@4 as n_name, c_address@5 as c_address, c_phone@3 as c_phone, c_comment@6 as c_comment] + AggregateExec: mode=Single, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@4 as c_acctbal, c_phone@3 as c_phone, n_name@8 as n_name, c_address@2 as c_address, c_comment@5 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + ProjectionExec: expr=[c_custkey@1 as c_custkey, c_name@2 as c_name, c_address@3 as c_address, c_phone@4 as c_phone, c_acctbal@5 as c_acctbal, c_comment@6 as c_comment, l_extendedprice@7 as l_extendedprice, l_discount@8 as l_discount, n_name@0 as n_name] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], projection=[n_name@1, c_custkey@2, c_name@3, c_address@4, c_phone@6, c_acctbal@7, c_comment@8, l_extendedprice@9, l_discount@10] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@7, l_orderkey@0)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, l_extendedprice@9, l_discount@10] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, o_orderkey@7] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13210681]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] + ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@2 >= 1993-07-01 AND o_orderdate@2 < 1993-10-01 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1993-07-01 AND o_orderdate@4 < 1993-10-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1993-07-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1993-10-01 END, required_guarantees=[] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_returnflag@3 = R + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], predicate=l_returnflag@8 = R, pruning_predicate=CASE WHEN l_returnflag_null_count@2 = l_returnflag_row_count@3 THEN false ELSE l_returnflag_min@0 <= R AND R <= l_returnflag_max@1 END, required_guarantees=[l_returnflag in (R)] + diff --git a/testdata/expected-plans/q11.txt b/testdata/expected-plans/q11.txt new file mode 100644 index 0000000..8be5c18 --- /dev/null +++ b/testdata/expected-plans/q11.txt @@ -0,0 +1,94 @@ +DataFusion Logical Plan +======================= + +Sort: value DESC NULLS FIRST + Projection: partsupp.ps_partkey, sum(partsupp.ps_supplycost * partsupp.ps_availqty) AS value + Inner Join: Filter: CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty) AS Decimal128(38, 15)) > __scalar_sq_1.sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001) + Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[sum(partsupp.ps_supplycost * CAST(partsupp.ps_availqty AS Decimal128(10, 0)))]] + Projection: partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost + Inner Join: supplier.s_nationkey = nation.n_nationkey + Projection: partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey + Inner Join: partsupp.ps_suppkey = supplier.s_suppkey + TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] + TableScan: supplier projection=[s_suppkey, s_nationkey] + Projection: nation.n_nationkey + Filter: nation.n_name = Utf8("ALGERIA") + TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("ALGERIA")] + SubqueryAlias: __scalar_sq_1 + Projection: CAST(CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty) AS Float64) * Float64(0.0001) AS Decimal128(38, 15)) + Aggregate: groupBy=[[]], aggr=[[sum(partsupp.ps_supplycost * CAST(partsupp.ps_availqty AS Decimal128(10, 0)))]] + Projection: partsupp.ps_availqty, partsupp.ps_supplycost + Inner Join: supplier.s_nationkey = nation.n_nationkey + Projection: partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey + Inner Join: partsupp.ps_suppkey = supplier.s_suppkey + TableScan: partsupp projection=[ps_suppkey, ps_availqty, ps_supplycost] + TableScan: supplier projection=[s_suppkey, s_nationkey] + Projection: nation.n_nationkey + Filter: nation.n_name = Utf8("ALGERIA") + TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("ALGERIA")] + +DataFusion Physical Plan +======================== + +SortExec: expr=[value@1 DESC], preserve_partitioning=[false] + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, sum(partsupp.ps_supplycost * partsupp.ps_availqty)@2 as value] + NestedLoopJoinExec: join_type=Inner, filter=CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Decimal128(38, 15)) > sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)@1 + ProjectionExec: expr=[CAST(CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 AS Decimal128(38, 15)) as sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)] + AggregateExec: mode=Single, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[ps_availqty@1, ps_supplycost@2] + ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = ALGERIA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] + ProjectionExec: expr=[ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_nationkey@1, ps_availqty@3, ps_supplycost@4] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41093580]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost] + AggregateExec: mode=Single, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[ps_partkey@1, ps_availqty@2, ps_supplycost@3] + ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = ALGERIA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_availqty@2 as ps_availqty, ps_supplycost@3 as ps_supplycost, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_availqty@4, ps_supplycost@5] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41093580]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] + +RaySQL Plan +=========== + +Query Stage #0 (1 -> 1): +SortExec: expr=[value@1 DESC], preserve_partitioning=[false] + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, sum(partsupp.ps_supplycost * partsupp.ps_availqty)@2 as value] + NestedLoopJoinExec: join_type=Inner, filter=CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Decimal128(38, 15)) > sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)@1 + ProjectionExec: expr=[CAST(CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 AS Decimal128(38, 15)) as sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)] + AggregateExec: mode=Single, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[ps_availqty@1, ps_supplycost@2] + ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = ALGERIA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] + ProjectionExec: expr=[ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_nationkey@1, ps_availqty@3, ps_supplycost@4] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41093580]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost] + AggregateExec: mode=Single, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[ps_partkey@1, ps_availqty@2, ps_supplycost@3] + ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = ALGERIA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_availqty@2 as ps_availqty, ps_supplycost@3 as ps_supplycost, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_availqty@4, ps_supplycost@5] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41093580]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] + diff --git a/testdata/expected-plans/q12.txt b/testdata/expected-plans/q12.txt new file mode 100644 index 0000000..7681fb6 --- /dev/null +++ b/testdata/expected-plans/q12.txt @@ -0,0 +1,44 @@ +DataFusion Logical Plan +======================= + +Sort: lineitem.l_shipmode ASC NULLS LAST + Projection: lineitem.l_shipmode, sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END) AS high_line_count, sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END) AS low_line_count + Aggregate: groupBy=[[lineitem.l_shipmode]], aggr=[[sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)]] + Projection: orders.o_orderpriority, lineitem.l_shipmode + Inner Join: orders.o_orderkey = lineitem.l_orderkey + TableScan: orders projection=[o_orderkey, o_orderpriority] + Projection: lineitem.l_orderkey, lineitem.l_shipmode + Filter: (lineitem.l_shipmode = Utf8("FOB") OR lineitem.l_shipmode = Utf8("SHIP")) AND lineitem.l_receiptdate > lineitem.l_commitdate AND lineitem.l_shipdate < lineitem.l_commitdate AND lineitem.l_receiptdate >= Date32("1995-01-01") AND lineitem.l_receiptdate < Date32("1996-01-01") + TableScan: lineitem projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode], partial_filters=[lineitem.l_shipmode = Utf8("FOB") OR lineitem.l_shipmode = Utf8("SHIP"), lineitem.l_receiptdate > lineitem.l_commitdate, lineitem.l_shipdate < lineitem.l_commitdate, lineitem.l_receiptdate >= Date32("1995-01-01"), lineitem.l_receiptdate < Date32("1996-01-01")] + +DataFusion Physical Plan +======================== + +SortExec: expr=[l_shipmode@0 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[l_shipmode@0 as l_shipmode, sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)@1 as high_line_count, sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)@2 as low_line_count] + AggregateExec: mode=Single, gby=[l_shipmode@1 as l_shipmode], aggr=[sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)] + ProjectionExec: expr=[o_orderpriority@1 as o_orderpriority, l_shipmode@0 as l_shipmode] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(l_orderkey@0, o_orderkey@0)], projection=[l_shipmode@1, o_orderpriority@3] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_shipmode@4 as l_shipmode] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: (l_shipmode@4 = FOB OR l_shipmode@4 = SHIP) AND l_receiptdate@3 > l_commitdate@2 AND l_shipdate@1 < l_commitdate@2 AND l_receiptdate@3 >= 1995-01-01 AND l_receiptdate@3 < 1996-01-01 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode], predicate=(l_shipmode@14 = FOB OR l_shipmode@14 = SHIP) AND l_receiptdate@12 > l_commitdate@11 AND l_shipdate@10 < l_commitdate@11 AND l_receiptdate@12 >= 1995-01-01 AND l_receiptdate@12 < 1996-01-01, pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= FOB AND FOB <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= SHIP AND SHIP <= l_shipmode_max@1 END) AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_max@4 >= 1995-01-01 END AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_min@7 < 1996-01-01 END, required_guarantees=[l_shipmode in (FOB, SHIP)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_orderpriority] + +RaySQL Plan +=========== + +Query Stage #0 (1 -> 1): +SortExec: expr=[l_shipmode@0 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[l_shipmode@0 as l_shipmode, sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)@1 as high_line_count, sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)@2 as low_line_count] + AggregateExec: mode=Single, gby=[l_shipmode@1 as l_shipmode], aggr=[sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)] + ProjectionExec: expr=[o_orderpriority@1 as o_orderpriority, l_shipmode@0 as l_shipmode] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(l_orderkey@0, o_orderkey@0)], projection=[l_shipmode@1, o_orderpriority@3] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_shipmode@4 as l_shipmode] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: (l_shipmode@4 = FOB OR l_shipmode@4 = SHIP) AND l_receiptdate@3 > l_commitdate@2 AND l_shipdate@1 < l_commitdate@2 AND l_receiptdate@3 >= 1995-01-01 AND l_receiptdate@3 < 1996-01-01 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode], predicate=(l_shipmode@14 = FOB OR l_shipmode@14 = SHIP) AND l_receiptdate@12 > l_commitdate@11 AND l_shipdate@10 < l_commitdate@11 AND l_receiptdate@12 >= 1995-01-01 AND l_receiptdate@12 < 1996-01-01, pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= FOB AND FOB <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= SHIP AND SHIP <= l_shipmode_max@1 END) AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_max@4 >= 1995-01-01 END AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_min@7 < 1996-01-01 END, required_guarantees=[l_shipmode in (FOB, SHIP)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_orderpriority] + diff --git a/testdata/expected-plans/q13.txt b/testdata/expected-plans/q13.txt new file mode 100644 index 0000000..a49b7d3 --- /dev/null +++ b/testdata/expected-plans/q13.txt @@ -0,0 +1,49 @@ +DataFusion Logical Plan +======================= + +Sort: custdist DESC NULLS FIRST, c_orders.c_count DESC NULLS FIRST + Projection: c_orders.c_count, count(*) AS custdist + Aggregate: groupBy=[[c_orders.c_count]], aggr=[[count(Int64(1)) AS count(*)]] + SubqueryAlias: c_orders + Projection: count(orders.o_orderkey) AS c_count + Aggregate: groupBy=[[customer.c_custkey]], aggr=[[count(orders.o_orderkey)]] + Projection: customer.c_custkey, orders.o_orderkey + Left Join: customer.c_custkey = orders.o_custkey + TableScan: customer projection=[c_custkey] + Projection: orders.o_orderkey, orders.o_custkey + Filter: orders.o_comment NOT LIKE Utf8("%express%requests%") + TableScan: orders projection=[o_orderkey, o_custkey, o_comment], partial_filters=[orders.o_comment NOT LIKE Utf8("%express%requests%")] + +DataFusion Physical Plan +======================== + +SortExec: expr=[custdist@1 DESC,c_count@0 DESC], preserve_partitioning=[false] + ProjectionExec: expr=[c_count@0 as c_count, count(*)@1 as custdist] + AggregateExec: mode=Single, gby=[c_count@0 as c_count], aggr=[count(*)] + ProjectionExec: expr=[count(orders.o_orderkey)@1 as c_count] + AggregateExec: mode=Single, gby=[c_custkey@0 as c_custkey], aggr=[count(orders.o_orderkey)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Left, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, o_orderkey@1] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13210681]]}, projection=[c_custkey] + ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_comment@2 NOT LIKE %express%requests% + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_custkey, o_comment], predicate=o_comment@8 NOT LIKE %express%requests% + +RaySQL Plan +=========== + +Query Stage #0 (1 -> 1): +SortExec: expr=[custdist@1 DESC,c_count@0 DESC], preserve_partitioning=[false] + ProjectionExec: expr=[c_count@0 as c_count, count(*)@1 as custdist] + AggregateExec: mode=Single, gby=[c_count@0 as c_count], aggr=[count(*)] + ProjectionExec: expr=[count(orders.o_orderkey)@1 as c_count] + AggregateExec: mode=Single, gby=[c_custkey@0 as c_custkey], aggr=[count(orders.o_orderkey)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Left, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, o_orderkey@1] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13210681]]}, projection=[c_custkey] + ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_comment@2 NOT LIKE %express%requests% + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_custkey, o_comment], predicate=o_comment@8 NOT LIKE %express%requests% + diff --git a/testdata/expected-plans/q14.txt b/testdata/expected-plans/q14.txt new file mode 100644 index 0000000..a7de0f0 --- /dev/null +++ b/testdata/expected-plans/q14.txt @@ -0,0 +1,41 @@ +DataFusion Logical Plan +======================= + +Projection: Float64(100) * CAST(sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END) AS Float64) / CAST(sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS Float64) AS promo_revenue + Aggregate: groupBy=[[]], aggr=[[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN __common_expr_1 ELSE Decimal128(Some(0),35,4) END) AS sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(__common_expr_1) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] + Projection: lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount) AS __common_expr_1, part.p_type + Inner Join: lineitem.l_partkey = part.p_partkey + Projection: lineitem.l_partkey, lineitem.l_extendedprice, lineitem.l_discount + Filter: lineitem.l_shipdate >= Date32("1995-02-01") AND lineitem.l_shipdate < Date32("1995-03-01") + TableScan: lineitem projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], partial_filters=[lineitem.l_shipdate >= Date32("1995-02-01"), lineitem.l_shipdate < Date32("1995-03-01")] + TableScan: part projection=[p_partkey, p_type] + +DataFusion Physical Plan +======================== + +ProjectionExec: expr=[100 * CAST(sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END)@0 AS Float64) / CAST(sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 AS Float64) as promo_revenue] + AggregateExec: mode=Single, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, p_type@0 as p_type] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_type@1, l_extendedprice@3, l_discount@4] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_type] + ProjectionExec: expr=[l_partkey@0 as l_partkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@3 >= 1995-02-01 AND l_shipdate@3 < 1995-03-01 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-02-01 AND l_shipdate@10 < 1995-03-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-02-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-03-01 END, required_guarantees=[] + +RaySQL Plan +=========== + +Query Stage #0 (1 -> 1): +ProjectionExec: expr=[100 * CAST(sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END)@0 AS Float64) / CAST(sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 AS Float64) as promo_revenue] + AggregateExec: mode=Single, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, p_type@0 as p_type] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_type@1, l_extendedprice@3, l_discount@4] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_type] + ProjectionExec: expr=[l_partkey@0 as l_partkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@3 >= 1995-02-01 AND l_shipdate@3 < 1995-03-01 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-02-01 AND l_shipdate@10 < 1995-03-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-02-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-03-01 END, required_guarantees=[] + diff --git a/testdata/expected-plans/q16.txt b/testdata/expected-plans/q16.txt new file mode 100644 index 0000000..d08b84a --- /dev/null +++ b/testdata/expected-plans/q16.txt @@ -0,0 +1,61 @@ +DataFusion Logical Plan +======================= + +Sort: supplier_cnt DESC NULLS FIRST, part.p_brand ASC NULLS LAST, part.p_type ASC NULLS LAST, part.p_size ASC NULLS LAST + Projection: part.p_brand, part.p_type, part.p_size, count(alias1) AS supplier_cnt + Aggregate: groupBy=[[part.p_brand, part.p_type, part.p_size]], aggr=[[count(alias1)]] + Aggregate: groupBy=[[part.p_brand, part.p_type, part.p_size, partsupp.ps_suppkey AS alias1]], aggr=[[]] + LeftAnti Join: partsupp.ps_suppkey = __correlated_sq_1.s_suppkey + Projection: partsupp.ps_suppkey, part.p_brand, part.p_type, part.p_size + Inner Join: partsupp.ps_partkey = part.p_partkey + TableScan: partsupp projection=[ps_partkey, ps_suppkey] + Filter: part.p_brand != Utf8("Brand#14") AND part.p_type NOT LIKE Utf8("SMALL PLATED%") AND part.p_size IN ([Int32(14), Int32(6), Int32(5), Int32(31), Int32(49), Int32(15), Int32(41), Int32(47)]) + TableScan: part projection=[p_partkey, p_brand, p_type, p_size], partial_filters=[part.p_brand != Utf8("Brand#14"), part.p_type NOT LIKE Utf8("SMALL PLATED%"), part.p_size IN ([Int32(14), Int32(6), Int32(5), Int32(31), Int32(49), Int32(15), Int32(41), Int32(47)])] + SubqueryAlias: __correlated_sq_1 + Projection: supplier.s_suppkey + Filter: supplier.s_comment LIKE Utf8("%Customer%Complaints%") + TableScan: supplier projection=[s_suppkey, s_comment], partial_filters=[supplier.s_comment LIKE Utf8("%Customer%Complaints%")] + +DataFusion Physical Plan +======================== + +SortExec: expr=[supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 ASC NULLS LAST,p_size@2 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size, count(alias1)@3 as supplier_cnt] + AggregateExec: mode=Single, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size], aggr=[count(alias1)] + AggregateExec: mode=Single, gby=[p_brand@1 as p_brand, p_type@2 as p_type, p_size@3 as p_size, ps_suppkey@0 as alias1], aggr=[] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=RightAnti, on=[(s_suppkey@0, ps_suppkey@0)] + ProjectionExec: expr=[s_suppkey@0 as s_suppkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: s_comment@1 LIKE %Customer%Complaints% + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_comment], predicate=s_comment@6 LIKE %Customer%Complaints% + ProjectionExec: expr=[ps_suppkey@3 as ps_suppkey, p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_brand@1, p_type@2, p_size@3, ps_suppkey@5] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_brand@1 != Brand#14 AND p_type@2 NOT LIKE SMALL PLATED% AND Use p_size@3 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]) + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_type, p_size], predicate=p_brand@3 != Brand#14 AND p_type@4 NOT LIKE SMALL PLATED% AND Use p_size@5 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]), pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 != Brand#14 OR Brand#14 != p_brand_max@1 END AND (CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 14 AND 14 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 6 AND 6 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 5 AND 5 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 31 AND 31 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 49 AND 49 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 15 AND 15 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 41 AND 41 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 47 AND 47 <= p_size_max@5 END), required_guarantees=[p_brand not in (Brand#14), p_size in (47, 49, 5, 14, 41, 6, 15, 31)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41093580]]}, projection=[ps_partkey, ps_suppkey] + +RaySQL Plan +=========== + +Query Stage #0 (1 -> 1): +SortExec: expr=[supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 ASC NULLS LAST,p_size@2 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size, count(alias1)@3 as supplier_cnt] + AggregateExec: mode=Single, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size], aggr=[count(alias1)] + AggregateExec: mode=Single, gby=[p_brand@1 as p_brand, p_type@2 as p_type, p_size@3 as p_size, ps_suppkey@0 as alias1], aggr=[] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=RightAnti, on=[(s_suppkey@0, ps_suppkey@0)] + ProjectionExec: expr=[s_suppkey@0 as s_suppkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: s_comment@1 LIKE %Customer%Complaints% + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_comment], predicate=s_comment@6 LIKE %Customer%Complaints% + ProjectionExec: expr=[ps_suppkey@3 as ps_suppkey, p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_brand@1, p_type@2, p_size@3, ps_suppkey@5] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_brand@1 != Brand#14 AND p_type@2 NOT LIKE SMALL PLATED% AND Use p_size@3 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]) + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_type, p_size], predicate=p_brand@3 != Brand#14 AND p_type@4 NOT LIKE SMALL PLATED% AND Use p_size@5 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]), pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 != Brand#14 OR Brand#14 != p_brand_max@1 END AND (CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 14 AND 14 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 6 AND 6 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 5 AND 5 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 31 AND 31 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 49 AND 49 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 15 AND 15 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 41 AND 41 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 47 AND 47 <= p_size_max@5 END), required_guarantees=[p_brand not in (Brand#14), p_size in (47, 49, 5, 14, 41, 6, 15, 31)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41093580]]}, projection=[ps_partkey, ps_suppkey] + diff --git a/testdata/expected-plans/q17.txt b/testdata/expected-plans/q17.txt new file mode 100644 index 0000000..327ad98 --- /dev/null +++ b/testdata/expected-plans/q17.txt @@ -0,0 +1,57 @@ +DataFusion Logical Plan +======================= + +Projection: CAST(sum(lineitem.l_extendedprice) AS Float64) / Float64(7) AS avg_yearly + Aggregate: groupBy=[[]], aggr=[[sum(lineitem.l_extendedprice)]] + Projection: lineitem.l_extendedprice + Inner Join: part.p_partkey = __scalar_sq_1.l_partkey Filter: CAST(lineitem.l_quantity AS Decimal128(30, 15)) < __scalar_sq_1.Float64(0.2) * avg(lineitem.l_quantity) + Projection: lineitem.l_quantity, lineitem.l_extendedprice, part.p_partkey + Inner Join: lineitem.l_partkey = part.p_partkey + TableScan: lineitem projection=[l_partkey, l_quantity, l_extendedprice] + Projection: part.p_partkey + Filter: part.p_brand = Utf8("Brand#42") AND part.p_container = Utf8("LG BAG") + TableScan: part projection=[p_partkey, p_brand, p_container], partial_filters=[part.p_brand = Utf8("Brand#42"), part.p_container = Utf8("LG BAG")] + SubqueryAlias: __scalar_sq_1 + Projection: CAST(Float64(0.2) * CAST(avg(lineitem.l_quantity) AS Float64) AS Decimal128(30, 15)), lineitem.l_partkey + Aggregate: groupBy=[[lineitem.l_partkey]], aggr=[[avg(lineitem.l_quantity)]] + TableScan: lineitem projection=[l_partkey, l_quantity] + +DataFusion Physical Plan +======================== + +ProjectionExec: expr=[CAST(sum(lineitem.l_extendedprice)@0 AS Float64) / 7 as avg_yearly] + AggregateExec: mode=Single, gby=[], aggr=[sum(lineitem.l_extendedprice)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@2, l_partkey@1)], filter=CAST(l_quantity@0 AS Decimal128(30, 15)) < Float64(0.2) * avg(lineitem.l_quantity)@1, projection=[l_extendedprice@1] + ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_partkey@0, l_quantity@2, l_extendedprice@3] + ProjectionExec: expr=[p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_brand@1 = Brand#42 AND p_container@2 = LG BAG + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_container], predicate=p_brand@3 = Brand#42 AND p_container@6 = LG BAG, pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 <= Brand#42 AND Brand#42 <= p_brand_max@1 END AND CASE WHEN p_container_null_count@6 = p_container_row_count@7 THEN false ELSE p_container_min@4 <= LG BAG AND LG BAG <= p_container_max@5 END, required_guarantees=[p_brand in (Brand#42), p_container in (LG BAG)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_partkey, l_quantity, l_extendedprice] + ProjectionExec: expr=[CAST(0.2 * CAST(avg(lineitem.l_quantity)@1 AS Float64) AS Decimal128(30, 15)) as Float64(0.2) * avg(lineitem.l_quantity), l_partkey@0 as l_partkey] + AggregateExec: mode=Single, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_partkey, l_quantity] + +RaySQL Plan +=========== + +Query Stage #0 (1 -> 1): +ProjectionExec: expr=[CAST(sum(lineitem.l_extendedprice)@0 AS Float64) / 7 as avg_yearly] + AggregateExec: mode=Single, gby=[], aggr=[sum(lineitem.l_extendedprice)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@2, l_partkey@1)], filter=CAST(l_quantity@0 AS Decimal128(30, 15)) < Float64(0.2) * avg(lineitem.l_quantity)@1, projection=[l_extendedprice@1] + ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_partkey@0, l_quantity@2, l_extendedprice@3] + ProjectionExec: expr=[p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_brand@1 = Brand#42 AND p_container@2 = LG BAG + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_container], predicate=p_brand@3 = Brand#42 AND p_container@6 = LG BAG, pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 <= Brand#42 AND Brand#42 <= p_brand_max@1 END AND CASE WHEN p_container_null_count@6 = p_container_row_count@7 THEN false ELSE p_container_min@4 <= LG BAG AND LG BAG <= p_container_max@5 END, required_guarantees=[p_brand in (Brand#42), p_container in (LG BAG)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_partkey, l_quantity, l_extendedprice] + ProjectionExec: expr=[CAST(0.2 * CAST(avg(lineitem.l_quantity)@1 AS Float64) AS Decimal128(30, 15)) as Float64(0.2) * avg(lineitem.l_quantity), l_partkey@0 as l_partkey] + AggregateExec: mode=Single, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_partkey, l_quantity] + diff --git a/testdata/expected-plans/q18.txt b/testdata/expected-plans/q18.txt new file mode 100644 index 0000000..0fb9045 --- /dev/null +++ b/testdata/expected-plans/q18.txt @@ -0,0 +1,61 @@ +DataFusion Logical Plan +======================= + +Limit: skip=0, fetch=100 + Sort: orders.o_totalprice DESC NULLS FIRST, orders.o_orderdate ASC NULLS LAST, fetch=100 + Aggregate: groupBy=[[customer.c_name, customer.c_custkey, orders.o_orderkey, orders.o_orderdate, orders.o_totalprice]], aggr=[[sum(lineitem.l_quantity)]] + LeftSemi Join: orders.o_orderkey = __correlated_sq_1.l_orderkey + Projection: customer.c_custkey, customer.c_name, orders.o_orderkey, orders.o_totalprice, orders.o_orderdate, lineitem.l_quantity + Inner Join: orders.o_orderkey = lineitem.l_orderkey + Projection: customer.c_custkey, customer.c_name, orders.o_orderkey, orders.o_totalprice, orders.o_orderdate + Inner Join: customer.c_custkey = orders.o_custkey + TableScan: customer projection=[c_custkey, c_name] + TableScan: orders projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] + TableScan: lineitem projection=[l_orderkey, l_quantity] + SubqueryAlias: __correlated_sq_1 + Projection: lineitem.l_orderkey + Filter: sum(lineitem.l_quantity) > Decimal128(Some(31300),21,2) + Aggregate: groupBy=[[lineitem.l_orderkey]], aggr=[[sum(lineitem.l_quantity)]] + TableScan: lineitem projection=[l_orderkey, l_quantity] + +DataFusion Physical Plan +======================== + +SortExec: TopK(fetch=100), expr=[o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAST], preserve_partitioning=[false] + AggregateExec: mode=Single, gby=[c_name@1 as c_name, c_custkey@0 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@4 as o_orderdate, o_totalprice@3 as o_totalprice], aggr=[sum(lineitem.l_quantity)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(l_orderkey@0, o_orderkey@2)] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: sum(lineitem.l_quantity)@1 > Some(31300),21,2 + AggregateExec: mode=Single, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_quantity] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@2, l_orderkey@0)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@3, o_orderdate@4, l_quantity@6] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@4, o_orderdate@5] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13210681]]}, projection=[c_custkey, c_name] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_quantity] + +RaySQL Plan +=========== + +Query Stage #0 (1 -> 1): +SortExec: TopK(fetch=100), expr=[o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAST], preserve_partitioning=[false] + AggregateExec: mode=Single, gby=[c_name@1 as c_name, c_custkey@0 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@4 as o_orderdate, o_totalprice@3 as o_totalprice], aggr=[sum(lineitem.l_quantity)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(l_orderkey@0, o_orderkey@2)] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: sum(lineitem.l_quantity)@1 > Some(31300),21,2 + AggregateExec: mode=Single, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_quantity] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@2, l_orderkey@0)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@3, o_orderdate@4, l_quantity@6] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@4, o_orderdate@5] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13210681]]}, projection=[c_custkey, c_name] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_quantity] + diff --git a/testdata/expected-plans/q19.txt b/testdata/expected-plans/q19.txt new file mode 100644 index 0000000..c73edc7 --- /dev/null +++ b/testdata/expected-plans/q19.txt @@ -0,0 +1,44 @@ +DataFusion Logical Plan +======================= + +Projection: sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue + Aggregate: groupBy=[[]], aggr=[[sum(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] + Projection: lineitem.l_extendedprice, lineitem.l_discount + Inner Join: lineitem.l_partkey = part.p_partkey Filter: part.p_brand = Utf8("Brand#21") AND part.p_container IN ([Utf8("SM CASE"), Utf8("SM BOX"), Utf8("SM PACK"), Utf8("SM PKG")]) AND lineitem.l_quantity >= Decimal128(Some(800),11,2) AND lineitem.l_quantity <= Decimal128(Some(1800),11,2) AND part.p_size <= Int32(5) OR part.p_brand = Utf8("Brand#13") AND part.p_container IN ([Utf8("MED BAG"), Utf8("MED BOX"), Utf8("MED PKG"), Utf8("MED PACK")]) AND lineitem.l_quantity >= Decimal128(Some(2000),11,2) AND lineitem.l_quantity <= Decimal128(Some(3000),11,2) AND part.p_size <= Int32(10) OR part.p_brand = Utf8("Brand#52") AND part.p_container IN ([Utf8("LG CASE"), Utf8("LG BOX"), Utf8("LG PACK"), Utf8("LG PKG")]) AND lineitem.l_quantity >= Decimal128(Some(3000),11,2) AND lineitem.l_quantity <= Decimal128(Some(4000),11,2) AND part.p_size <= Int32(15) + Projection: lineitem.l_partkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount + Filter: (lineitem.l_quantity >= Decimal128(Some(800),11,2) AND lineitem.l_quantity <= Decimal128(Some(1800),11,2) OR lineitem.l_quantity >= Decimal128(Some(2000),11,2) AND lineitem.l_quantity <= Decimal128(Some(3000),11,2) OR lineitem.l_quantity >= Decimal128(Some(3000),11,2) AND lineitem.l_quantity <= Decimal128(Some(4000),11,2)) AND (lineitem.l_shipmode = Utf8("AIR") OR lineitem.l_shipmode = Utf8("AIR REG")) AND lineitem.l_shipinstruct = Utf8("DELIVER IN PERSON") + TableScan: lineitem projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], partial_filters=[lineitem.l_shipmode = Utf8("AIR") OR lineitem.l_shipmode = Utf8("AIR REG"), lineitem.l_shipinstruct = Utf8("DELIVER IN PERSON"), lineitem.l_quantity >= Decimal128(Some(800),11,2) AND lineitem.l_quantity <= Decimal128(Some(1800),11,2) OR lineitem.l_quantity >= Decimal128(Some(2000),11,2) AND lineitem.l_quantity <= Decimal128(Some(3000),11,2) OR lineitem.l_quantity >= Decimal128(Some(3000),11,2) AND lineitem.l_quantity <= Decimal128(Some(4000),11,2)] + Filter: (part.p_brand = Utf8("Brand#21") AND part.p_container IN ([Utf8("SM CASE"), Utf8("SM BOX"), Utf8("SM PACK"), Utf8("SM PKG")]) AND part.p_size <= Int32(5) OR part.p_brand = Utf8("Brand#13") AND part.p_container IN ([Utf8("MED BAG"), Utf8("MED BOX"), Utf8("MED PKG"), Utf8("MED PACK")]) AND part.p_size <= Int32(10) OR part.p_brand = Utf8("Brand#52") AND part.p_container IN ([Utf8("LG CASE"), Utf8("LG BOX"), Utf8("LG PACK"), Utf8("LG PKG")]) AND part.p_size <= Int32(15)) AND part.p_size >= Int32(1) + TableScan: part projection=[p_partkey, p_brand, p_size, p_container], partial_filters=[part.p_size >= Int32(1), part.p_brand = Utf8("Brand#21") AND part.p_container IN ([Utf8("SM CASE"), Utf8("SM BOX"), Utf8("SM PACK"), Utf8("SM PKG")]) AND part.p_size <= Int32(5) OR part.p_brand = Utf8("Brand#13") AND part.p_container IN ([Utf8("MED BAG"), Utf8("MED BOX"), Utf8("MED PKG"), Utf8("MED PACK")]) AND part.p_size <= Int32(10) OR part.p_brand = Utf8("Brand#52") AND part.p_container IN ([Utf8("LG CASE"), Utf8("LG BOX"), Utf8("LG PACK"), Utf8("LG PKG")]) AND part.p_size <= Int32(15)] + +DataFusion Physical Plan +======================== + +ProjectionExec: expr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@0 as revenue] + AggregateExec: mode=Single, gby=[], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], filter=p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND l_quantity@0 >= Some(800),11,2 AND l_quantity@0 <= Some(1800),11,2 AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND l_quantity@0 >= Some(2000),11,2 AND l_quantity@0 <= Some(3000),11,2 AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND l_quantity@0 >= Some(3000),11,2 AND l_quantity@0 <= Some(4000),11,2 AND p_size@2 <= 15, projection=[l_extendedprice@6, l_discount@7] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: (p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@2 <= 15) AND p_size@2 >= 1 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_size, p_container], predicate=p_size@5 >= 1 AND (p_brand@3 = Brand#21 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@5 <= 5 OR p_brand@3 = Brand#13 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@5 <= 10 OR p_brand@3 = Brand#52 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@5 <= 15), pruning_predicate=CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_max@0 >= 1 END AND (CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#21 AND Brand#21 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM CASE AND SM CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM BOX AND SM BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PACK AND SM PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PKG AND SM PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 5 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#13 AND Brand#13 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BAG AND MED BAG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BOX AND MED BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PKG AND MED PKG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PACK AND MED PACK <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 10 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#52 AND Brand#52 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG CASE AND LG CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG BOX AND LG BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PACK AND LG PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PKG AND LG PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 15 END), required_guarantees=[] + ProjectionExec: expr=[l_partkey@0 as l_partkey, l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: (l_quantity@1 >= Some(800),11,2 AND l_quantity@1 <= Some(1800),11,2 OR l_quantity@1 >= Some(2000),11,2 AND l_quantity@1 <= Some(3000),11,2 OR l_quantity@1 >= Some(3000),11,2 AND l_quantity@1 <= Some(4000),11,2) AND (l_shipmode@5 = AIR OR l_shipmode@5 = AIR REG) AND l_shipinstruct@4 = DELIVER IN PERSON + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], predicate=(l_shipmode@14 = AIR OR l_shipmode@14 = AIR REG) AND l_shipinstruct@13 = DELIVER IN PERSON AND (l_quantity@4 >= Some(800),11,2 AND l_quantity@4 <= Some(1800),11,2 OR l_quantity@4 >= Some(2000),11,2 AND l_quantity@4 <= Some(3000),11,2 OR l_quantity@4 >= Some(3000),11,2 AND l_quantity@4 <= Some(4000),11,2), pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR AND AIR <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR REG AND AIR REG <= l_shipmode_max@1 END) AND CASE WHEN l_shipinstruct_null_count@6 = l_shipinstruct_row_count@7 THEN false ELSE l_shipinstruct_min@4 <= DELIVER IN PERSON AND DELIVER IN PERSON <= l_shipinstruct_max@5 END AND (CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(800),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(1800),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(2000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(3000),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(3000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(4000),11,2 END), required_guarantees=[l_shipmode in (AIR REG, AIR), l_shipinstruct in (DELIVER IN PERSON)] + +RaySQL Plan +=========== + +Query Stage #0 (1 -> 1): +ProjectionExec: expr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@0 as revenue] + AggregateExec: mode=Single, gby=[], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], filter=p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND l_quantity@0 >= Some(800),11,2 AND l_quantity@0 <= Some(1800),11,2 AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND l_quantity@0 >= Some(2000),11,2 AND l_quantity@0 <= Some(3000),11,2 AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND l_quantity@0 >= Some(3000),11,2 AND l_quantity@0 <= Some(4000),11,2 AND p_size@2 <= 15, projection=[l_extendedprice@6, l_discount@7] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: (p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@2 <= 15) AND p_size@2 >= 1 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_size, p_container], predicate=p_size@5 >= 1 AND (p_brand@3 = Brand#21 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@5 <= 5 OR p_brand@3 = Brand#13 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@5 <= 10 OR p_brand@3 = Brand#52 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@5 <= 15), pruning_predicate=CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_max@0 >= 1 END AND (CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#21 AND Brand#21 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM CASE AND SM CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM BOX AND SM BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PACK AND SM PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PKG AND SM PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 5 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#13 AND Brand#13 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BAG AND MED BAG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BOX AND MED BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PKG AND MED PKG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PACK AND MED PACK <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 10 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#52 AND Brand#52 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG CASE AND LG CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG BOX AND LG BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PACK AND LG PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PKG AND LG PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 15 END), required_guarantees=[] + ProjectionExec: expr=[l_partkey@0 as l_partkey, l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: (l_quantity@1 >= Some(800),11,2 AND l_quantity@1 <= Some(1800),11,2 OR l_quantity@1 >= Some(2000),11,2 AND l_quantity@1 <= Some(3000),11,2 OR l_quantity@1 >= Some(3000),11,2 AND l_quantity@1 <= Some(4000),11,2) AND (l_shipmode@5 = AIR OR l_shipmode@5 = AIR REG) AND l_shipinstruct@4 = DELIVER IN PERSON + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], predicate=(l_shipmode@14 = AIR OR l_shipmode@14 = AIR REG) AND l_shipinstruct@13 = DELIVER IN PERSON AND (l_quantity@4 >= Some(800),11,2 AND l_quantity@4 <= Some(1800),11,2 OR l_quantity@4 >= Some(2000),11,2 AND l_quantity@4 <= Some(3000),11,2 OR l_quantity@4 >= Some(3000),11,2 AND l_quantity@4 <= Some(4000),11,2), pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR AND AIR <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR REG AND AIR REG <= l_shipmode_max@1 END) AND CASE WHEN l_shipinstruct_null_count@6 = l_shipinstruct_row_count@7 THEN false ELSE l_shipinstruct_min@4 <= DELIVER IN PERSON AND DELIVER IN PERSON <= l_shipinstruct_max@5 END AND (CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(800),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(1800),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(2000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(3000),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(3000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(4000),11,2 END), required_guarantees=[l_shipmode in (AIR REG, AIR), l_shipinstruct in (DELIVER IN PERSON)] + diff --git a/testdata/expected-plans/q2.txt b/testdata/expected-plans/q2.txt new file mode 100644 index 0000000..f22938c --- /dev/null +++ b/testdata/expected-plans/q2.txt @@ -0,0 +1,133 @@ +DataFusion Logical Plan +======================= + +Limit: skip=0, fetch=100 + Sort: supplier.s_acctbal DESC NULLS FIRST, nation.n_name ASC NULLS LAST, supplier.s_name ASC NULLS LAST, part.p_partkey ASC NULLS LAST, fetch=100 + Projection: supplier.s_acctbal, supplier.s_name, nation.n_name, part.p_partkey, part.p_mfgr, supplier.s_address, supplier.s_phone, supplier.s_comment + Inner Join: part.p_partkey = __scalar_sq_1.ps_partkey, partsupp.ps_supplycost = __scalar_sq_1.min(partsupp.ps_supplycost) + Projection: part.p_partkey, part.p_mfgr, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_acctbal, supplier.s_comment, partsupp.ps_supplycost, nation.n_name + Inner Join: nation.n_regionkey = region.r_regionkey + Projection: part.p_partkey, part.p_mfgr, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_acctbal, supplier.s_comment, partsupp.ps_supplycost, nation.n_name, nation.n_regionkey + Inner Join: supplier.s_nationkey = nation.n_nationkey + Projection: part.p_partkey, part.p_mfgr, supplier.s_name, supplier.s_address, supplier.s_nationkey, supplier.s_phone, supplier.s_acctbal, supplier.s_comment, partsupp.ps_supplycost + Inner Join: partsupp.ps_suppkey = supplier.s_suppkey + Projection: part.p_partkey, part.p_mfgr, partsupp.ps_suppkey, partsupp.ps_supplycost + Inner Join: part.p_partkey = partsupp.ps_partkey + Projection: part.p_partkey, part.p_mfgr + Filter: part.p_size = Int32(48) AND part.p_type LIKE Utf8("%TIN") + TableScan: part projection=[p_partkey, p_mfgr, p_type, p_size], partial_filters=[part.p_size = Int32(48), part.p_type LIKE Utf8("%TIN")] + TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost] + TableScan: supplier projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] + TableScan: nation projection=[n_nationkey, n_name, n_regionkey] + Projection: region.r_regionkey + Filter: region.r_name = Utf8("ASIA") + TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = Utf8("ASIA")] + SubqueryAlias: __scalar_sq_1 + Projection: min(partsupp.ps_supplycost), partsupp.ps_partkey + Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[min(partsupp.ps_supplycost)]] + Projection: partsupp.ps_partkey, partsupp.ps_supplycost + Inner Join: nation.n_regionkey = region.r_regionkey + Projection: partsupp.ps_partkey, partsupp.ps_supplycost, nation.n_regionkey + Inner Join: supplier.s_nationkey = nation.n_nationkey + Projection: partsupp.ps_partkey, partsupp.ps_supplycost, supplier.s_nationkey + Inner Join: partsupp.ps_suppkey = supplier.s_suppkey + TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost] + TableScan: supplier projection=[s_suppkey, s_nationkey] + TableScan: nation projection=[n_nationkey, n_regionkey] + Projection: region.r_regionkey + Filter: region.r_name = Utf8("ASIA") + TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = Utf8("ASIA")] + +DataFusion Physical Plan +======================== + +SortExec: TopK(fetch=100), expr=[s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[s_acctbal@5 as s_acctbal, s_name@2 as s_name, n_name@7 as n_name, p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, s_address@3 as s_address, s_phone@4 as s_phone, s_comment@6 as s_comment] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, ps_partkey@1), (ps_supplycost@7, min(partsupp.ps_supplycost)@0)], projection=[p_partkey@0, p_mfgr@1, s_name@2, s_address@3, s_phone@4, s_acctbal@5, s_comment@6, n_name@8] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(r_regionkey@0, n_regionkey@9)], projection=[p_partkey@1, p_mfgr@2, s_name@3, s_address@4, s_phone@5, s_acctbal@6, s_comment@7, ps_supplycost@8, n_name@9] + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: r_name@1 = ASIA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] + ProjectionExec: expr=[p_partkey@2 as p_partkey, p_mfgr@3 as p_mfgr, s_name@4 as s_name, s_address@5 as s_address, s_phone@6 as s_phone, s_acctbal@7 as s_acctbal, s_comment@8 as s_comment, ps_supplycost@9 as ps_supplycost, n_name@0 as n_name, n_regionkey@1 as n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@4)], projection=[n_name@1, n_regionkey@2, p_partkey@3, p_mfgr@4, s_name@5, s_address@6, s_phone@8, s_acctbal@9, s_comment@10, ps_supplycost@11] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] + ProjectionExec: expr=[p_partkey@6 as p_partkey, p_mfgr@7 as p_mfgr, s_name@0 as s_name, s_address@1 as s_address, s_nationkey@2 as s_nationkey, s_phone@3 as s_phone, s_acctbal@4 as s_acctbal, s_comment@5 as s_comment, ps_supplycost@8 as ps_supplycost] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@2)], projection=[s_name@1, s_address@2, s_nationkey@3, s_phone@4, s_acctbal@5, s_comment@6, p_partkey@7, p_mfgr@8, ps_supplycost@10] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_partkey@0, p_mfgr@1, ps_suppkey@3, ps_supplycost@4] + ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_size@3 = 48 AND p_type@2 LIKE %TIN + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_mfgr, p_type, p_size], predicate=p_size@5 = 48 AND p_type@4 LIKE %TIN, pruning_predicate=CASE WHEN p_size_null_count@2 = p_size_row_count@3 THEN false ELSE p_size_min@0 <= 48 AND 48 <= p_size_max@1 END, required_guarantees=[p_size in (48)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41093580]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + ProjectionExec: expr=[min(partsupp.ps_supplycost)@1 as min(partsupp.ps_supplycost), ps_partkey@0 as ps_partkey] + AggregateExec: mode=Single, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(r_regionkey@0, n_regionkey@2)], projection=[ps_partkey@1, ps_supplycost@2] + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: r_name@1 = ASIA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, n_regionkey@0 as n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_regionkey@1, ps_partkey@2, ps_supplycost@3] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_regionkey] + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_supplycost@4] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41093580]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + +RaySQL Plan +=========== + +Query Stage #0 (1 -> 1): +SortExec: TopK(fetch=100), expr=[s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[s_acctbal@5 as s_acctbal, s_name@2 as s_name, n_name@7 as n_name, p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, s_address@3 as s_address, s_phone@4 as s_phone, s_comment@6 as s_comment] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, ps_partkey@1), (ps_supplycost@7, min(partsupp.ps_supplycost)@0)], projection=[p_partkey@0, p_mfgr@1, s_name@2, s_address@3, s_phone@4, s_acctbal@5, s_comment@6, n_name@8] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(r_regionkey@0, n_regionkey@9)], projection=[p_partkey@1, p_mfgr@2, s_name@3, s_address@4, s_phone@5, s_acctbal@6, s_comment@7, ps_supplycost@8, n_name@9] + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: r_name@1 = ASIA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] + ProjectionExec: expr=[p_partkey@2 as p_partkey, p_mfgr@3 as p_mfgr, s_name@4 as s_name, s_address@5 as s_address, s_phone@6 as s_phone, s_acctbal@7 as s_acctbal, s_comment@8 as s_comment, ps_supplycost@9 as ps_supplycost, n_name@0 as n_name, n_regionkey@1 as n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@4)], projection=[n_name@1, n_regionkey@2, p_partkey@3, p_mfgr@4, s_name@5, s_address@6, s_phone@8, s_acctbal@9, s_comment@10, ps_supplycost@11] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] + ProjectionExec: expr=[p_partkey@6 as p_partkey, p_mfgr@7 as p_mfgr, s_name@0 as s_name, s_address@1 as s_address, s_nationkey@2 as s_nationkey, s_phone@3 as s_phone, s_acctbal@4 as s_acctbal, s_comment@5 as s_comment, ps_supplycost@8 as ps_supplycost] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@2)], projection=[s_name@1, s_address@2, s_nationkey@3, s_phone@4, s_acctbal@5, s_comment@6, p_partkey@7, p_mfgr@8, ps_supplycost@10] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_partkey@0, p_mfgr@1, ps_suppkey@3, ps_supplycost@4] + ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_size@3 = 48 AND p_type@2 LIKE %TIN + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_mfgr, p_type, p_size], predicate=p_size@5 = 48 AND p_type@4 LIKE %TIN, pruning_predicate=CASE WHEN p_size_null_count@2 = p_size_row_count@3 THEN false ELSE p_size_min@0 <= 48 AND 48 <= p_size_max@1 END, required_guarantees=[p_size in (48)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41093580]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + ProjectionExec: expr=[min(partsupp.ps_supplycost)@1 as min(partsupp.ps_supplycost), ps_partkey@0 as ps_partkey] + AggregateExec: mode=Single, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(r_regionkey@0, n_regionkey@2)], projection=[ps_partkey@1, ps_supplycost@2] + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: r_name@1 = ASIA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, n_regionkey@0 as n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_regionkey@1, ps_partkey@2, ps_supplycost@3] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_regionkey] + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_supplycost@4] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41093580]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + diff --git a/testdata/expected-plans/q20.txt b/testdata/expected-plans/q20.txt new file mode 100644 index 0000000..832ab30 --- /dev/null +++ b/testdata/expected-plans/q20.txt @@ -0,0 +1,87 @@ +DataFusion Logical Plan +======================= + +Sort: supplier.s_name ASC NULLS LAST + Projection: supplier.s_name, supplier.s_address + LeftSemi Join: supplier.s_suppkey = __correlated_sq_1.ps_suppkey + Projection: supplier.s_suppkey, supplier.s_name, supplier.s_address + Inner Join: supplier.s_nationkey = nation.n_nationkey + TableScan: supplier projection=[s_suppkey, s_name, s_address, s_nationkey] + Projection: nation.n_nationkey + Filter: nation.n_name = Utf8("KENYA") + TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("KENYA")] + SubqueryAlias: __correlated_sq_1 + Projection: partsupp.ps_suppkey + Inner Join: partsupp.ps_partkey = __scalar_sq_3.l_partkey, partsupp.ps_suppkey = __scalar_sq_3.l_suppkey Filter: CAST(partsupp.ps_availqty AS Float64) > __scalar_sq_3.Float64(0.5) * sum(lineitem.l_quantity) + LeftSemi Join: partsupp.ps_partkey = __correlated_sq_2.p_partkey + TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty] + SubqueryAlias: __correlated_sq_2 + Projection: part.p_partkey + Filter: part.p_name LIKE Utf8("blanched%") + TableScan: part projection=[p_partkey, p_name], partial_filters=[part.p_name LIKE Utf8("blanched%")] + SubqueryAlias: __scalar_sq_3 + Projection: Float64(0.5) * CAST(sum(lineitem.l_quantity) AS Float64), lineitem.l_partkey, lineitem.l_suppkey + Aggregate: groupBy=[[lineitem.l_partkey, lineitem.l_suppkey]], aggr=[[sum(lineitem.l_quantity)]] + Projection: lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_quantity + Filter: lineitem.l_shipdate >= Date32("1993-01-01") AND lineitem.l_shipdate < Date32("1994-01-01") + TableScan: lineitem projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], partial_filters=[lineitem.l_shipdate >= Date32("1993-01-01"), lineitem.l_shipdate < Date32("1994-01-01")] + +DataFusion Physical Plan +======================== + +SortExec: expr=[s_name@0 ASC NULLS LAST], preserve_partitioning=[false] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=LeftSemi, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_name@1, s_address@2] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[s_suppkey@1, s_name@2, s_address@3] + ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = KENYA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = KENYA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= KENYA AND KENYA <= n_name_max@1 END, required_guarantees=[n_name in (KENYA)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(ps_partkey@0, l_partkey@1), (ps_suppkey@1, l_suppkey@2)], filter=CAST(ps_availqty@0 AS Float64) > Float64(0.5) * sum(lineitem.l_quantity)@1, projection=[ps_suppkey@1] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(p_partkey@0, ps_partkey@0)] + ProjectionExec: expr=[p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_name@1 LIKE blanched% + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE blanched% + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41093580]]}, projection=[ps_partkey, ps_suppkey, ps_availqty] + ProjectionExec: expr=[0.5 * CAST(sum(lineitem.l_quantity)@2 AS Float64) as Float64(0.5) * sum(lineitem.l_quantity), l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey] + AggregateExec: mode=Single, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] + ProjectionExec: expr=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey, l_quantity@2 as l_quantity] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@3 >= 1993-01-01 AND l_shipdate@3 < 1994-01-01 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], predicate=l_shipdate@10 >= 1993-01-01 AND l_shipdate@10 < 1994-01-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1993-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1994-01-01 END, required_guarantees=[] + +RaySQL Plan +=========== + +Query Stage #0 (1 -> 1): +SortExec: expr=[s_name@0 ASC NULLS LAST], preserve_partitioning=[false] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=LeftSemi, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_name@1, s_address@2] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[s_suppkey@1, s_name@2, s_address@3] + ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = KENYA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = KENYA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= KENYA AND KENYA <= n_name_max@1 END, required_guarantees=[n_name in (KENYA)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(ps_partkey@0, l_partkey@1), (ps_suppkey@1, l_suppkey@2)], filter=CAST(ps_availqty@0 AS Float64) > Float64(0.5) * sum(lineitem.l_quantity)@1, projection=[ps_suppkey@1] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(p_partkey@0, ps_partkey@0)] + ProjectionExec: expr=[p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_name@1 LIKE blanched% + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE blanched% + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41093580]]}, projection=[ps_partkey, ps_suppkey, ps_availqty] + ProjectionExec: expr=[0.5 * CAST(sum(lineitem.l_quantity)@2 AS Float64) as Float64(0.5) * sum(lineitem.l_quantity), l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey] + AggregateExec: mode=Single, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] + ProjectionExec: expr=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey, l_quantity@2 as l_quantity] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@3 >= 1993-01-01 AND l_shipdate@3 < 1994-01-01 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], predicate=l_shipdate@10 >= 1993-01-01 AND l_shipdate@10 < 1994-01-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1993-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1994-01-01 END, required_guarantees=[] + diff --git a/testdata/expected-plans/q21.txt b/testdata/expected-plans/q21.txt new file mode 100644 index 0000000..7c4f7a4 --- /dev/null +++ b/testdata/expected-plans/q21.txt @@ -0,0 +1,107 @@ +DataFusion Logical Plan +======================= + +Limit: skip=0, fetch=100 + Sort: numwait DESC NULLS FIRST, supplier.s_name ASC NULLS LAST, fetch=100 + Projection: supplier.s_name, count(*) AS numwait + Aggregate: groupBy=[[supplier.s_name]], aggr=[[count(Int64(1)) AS count(*)]] + Projection: supplier.s_name + LeftAnti Join: l1.l_orderkey = __correlated_sq_2.l_orderkey Filter: __correlated_sq_2.l_suppkey != l1.l_suppkey + LeftSemi Join: l1.l_orderkey = __correlated_sq_1.l_orderkey Filter: __correlated_sq_1.l_suppkey != l1.l_suppkey + Projection: supplier.s_name, l1.l_orderkey, l1.l_suppkey + Inner Join: supplier.s_nationkey = nation.n_nationkey + Projection: supplier.s_name, supplier.s_nationkey, l1.l_orderkey, l1.l_suppkey + Inner Join: l1.l_orderkey = orders.o_orderkey + Projection: supplier.s_name, supplier.s_nationkey, l1.l_orderkey, l1.l_suppkey + Inner Join: supplier.s_suppkey = l1.l_suppkey + TableScan: supplier projection=[s_suppkey, s_name, s_nationkey] + SubqueryAlias: l1 + Projection: lineitem.l_orderkey, lineitem.l_suppkey + Filter: lineitem.l_receiptdate > lineitem.l_commitdate + TableScan: lineitem projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], partial_filters=[lineitem.l_receiptdate > lineitem.l_commitdate] + Projection: orders.o_orderkey + Filter: orders.o_orderstatus = Utf8("F") + TableScan: orders projection=[o_orderkey, o_orderstatus], partial_filters=[orders.o_orderstatus = Utf8("F")] + Projection: nation.n_nationkey + Filter: nation.n_name = Utf8("ARGENTINA") + TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("ARGENTINA")] + SubqueryAlias: __correlated_sq_1 + SubqueryAlias: l2 + TableScan: lineitem projection=[l_orderkey, l_suppkey] + SubqueryAlias: __correlated_sq_2 + SubqueryAlias: l3 + Projection: lineitem.l_orderkey, lineitem.l_suppkey + Filter: lineitem.l_receiptdate > lineitem.l_commitdate + TableScan: lineitem projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], partial_filters=[lineitem.l_receiptdate > lineitem.l_commitdate] + +DataFusion Physical Plan +======================== + +SortExec: TopK(fetch=100), expr=[numwait@1 DESC,s_name@0 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[s_name@0 as s_name, count(*)@1 as numwait] + AggregateExec: mode=Single, gby=[s_name@0 as s_name], aggr=[count(*)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=LeftAnti, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0, projection=[s_name@0] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=LeftSemi, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@1)], projection=[s_name@1, l_orderkey@3, l_suppkey@4] + ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = ARGENTINA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ARGENTINA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ARGENTINA AND ARGENTINA <= n_name_max@1 END, required_guarantees=[n_name in (ARGENTINA)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@2)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] + ProjectionExec: expr=[o_orderkey@0 as o_orderkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderstatus@1 = F + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_orderstatus], predicate=o_orderstatus@2 = F, pruning_predicate=CASE WHEN o_orderstatus_null_count@2 = o_orderstatus_row_count@3 THEN false ELSE o_orderstatus_min@0 <= F AND F <= o_orderstatus_max@1 END, required_guarantees=[o_orderstatus in (F)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_nationkey] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_receiptdate@3 > l_commitdate@2 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_suppkey] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_receiptdate@3 > l_commitdate@2 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + +RaySQL Plan +=========== + +Query Stage #0 (1 -> 1): +SortExec: TopK(fetch=100), expr=[numwait@1 DESC,s_name@0 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[s_name@0 as s_name, count(*)@1 as numwait] + AggregateExec: mode=Single, gby=[s_name@0 as s_name], aggr=[count(*)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=LeftAnti, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0, projection=[s_name@0] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=LeftSemi, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@1)], projection=[s_name@1, l_orderkey@3, l_suppkey@4] + ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = ARGENTINA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ARGENTINA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ARGENTINA AND ARGENTINA <= n_name_max@1 END, required_guarantees=[n_name in (ARGENTINA)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@2)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] + ProjectionExec: expr=[o_orderkey@0 as o_orderkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderstatus@1 = F + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_orderstatus], predicate=o_orderstatus@2 = F, pruning_predicate=CASE WHEN o_orderstatus_null_count@2 = o_orderstatus_row_count@3 THEN false ELSE o_orderstatus_min@0 <= F AND F <= o_orderstatus_max@1 END, required_guarantees=[o_orderstatus in (F)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_nationkey] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_receiptdate@3 > l_commitdate@2 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_suppkey] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_receiptdate@3 > l_commitdate@2 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + diff --git a/testdata/expected-plans/q22.txt b/testdata/expected-plans/q22.txt new file mode 100644 index 0000000..21a5e01 --- /dev/null +++ b/testdata/expected-plans/q22.txt @@ -0,0 +1,62 @@ +DataFusion Logical Plan +======================= + +Sort: custsale.cntrycode ASC NULLS LAST + Projection: custsale.cntrycode, count(*) AS numcust, sum(custsale.c_acctbal) AS totacctbal + Aggregate: groupBy=[[custsale.cntrycode]], aggr=[[count(Int64(1)) AS count(*), sum(custsale.c_acctbal)]] + SubqueryAlias: custsale + Projection: substr(customer.c_phone, Int64(1), Int64(2)) AS cntrycode, customer.c_acctbal + Inner Join: Filter: CAST(customer.c_acctbal AS Decimal128(15, 6)) > __scalar_sq_2.avg(customer.c_acctbal) + Projection: customer.c_phone, customer.c_acctbal + LeftAnti Join: customer.c_custkey = __correlated_sq_1.o_custkey + Filter: substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("24"), Utf8("34"), Utf8("16"), Utf8("30"), Utf8("33"), Utf8("14"), Utf8("13")]) + TableScan: customer projection=[c_custkey, c_phone, c_acctbal], partial_filters=[substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("24"), Utf8("34"), Utf8("16"), Utf8("30"), Utf8("33"), Utf8("14"), Utf8("13")])] + SubqueryAlias: __correlated_sq_1 + TableScan: orders projection=[o_custkey] + SubqueryAlias: __scalar_sq_2 + Aggregate: groupBy=[[]], aggr=[[avg(customer.c_acctbal)]] + Projection: customer.c_acctbal + Filter: customer.c_acctbal > Decimal128(Some(0),11,2) AND substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("24"), Utf8("34"), Utf8("16"), Utf8("30"), Utf8("33"), Utf8("14"), Utf8("13")]) + TableScan: customer projection=[c_phone, c_acctbal], partial_filters=[customer.c_acctbal > Decimal128(Some(0),11,2) AS customer.c_acctbal > Decimal128(Some(0),30,15), substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("24"), Utf8("34"), Utf8("16"), Utf8("30"), Utf8("33"), Utf8("14"), Utf8("13")]), customer.c_acctbal > Decimal128(Some(0),11,2)] + +DataFusion Physical Plan +======================== + +SortExec: expr=[cntrycode@0 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[cntrycode@0 as cntrycode, count(*)@1 as numcust, sum(custsale.c_acctbal)@2 as totacctbal] + AggregateExec: mode=Single, gby=[cntrycode@0 as cntrycode], aggr=[count(*), sum(custsale.c_acctbal)] + ProjectionExec: expr=[substr(c_phone@1, 1, 2) as cntrycode, c_acctbal@2 as c_acctbal] + NestedLoopJoinExec: join_type=Inner, filter=CAST(c_acctbal@0 AS Decimal128(15, 6)) > avg(customer.c_acctbal)@1 + AggregateExec: mode=Single, gby=[], aggr=[avg(customer.c_acctbal)] + ProjectionExec: expr=[c_acctbal@1 as c_acctbal] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: c_acctbal@1 > Some(0),11,2 AND Use substr(c_phone@0, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13210681]]}, projection=[c_phone, c_acctbal], predicate=c_acctbal@5 > Some(0),11,2 AND Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) AND c_acctbal@5 > Some(0),11,2, pruning_predicate=CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END AND CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END, required_guarantees=[] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=LeftAnti, on=[(c_custkey@0, o_custkey@0)], projection=[c_phone@1, c_acctbal@2] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: Use substr(c_phone@1, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13210681]]}, projection=[c_custkey, c_phone, c_acctbal], predicate=Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_custkey] + +RaySQL Plan +=========== + +Query Stage #0 (1 -> 1): +SortExec: expr=[cntrycode@0 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[cntrycode@0 as cntrycode, count(*)@1 as numcust, sum(custsale.c_acctbal)@2 as totacctbal] + AggregateExec: mode=Single, gby=[cntrycode@0 as cntrycode], aggr=[count(*), sum(custsale.c_acctbal)] + ProjectionExec: expr=[substr(c_phone@1, 1, 2) as cntrycode, c_acctbal@2 as c_acctbal] + NestedLoopJoinExec: join_type=Inner, filter=CAST(c_acctbal@0 AS Decimal128(15, 6)) > avg(customer.c_acctbal)@1 + AggregateExec: mode=Single, gby=[], aggr=[avg(customer.c_acctbal)] + ProjectionExec: expr=[c_acctbal@1 as c_acctbal] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: c_acctbal@1 > Some(0),11,2 AND Use substr(c_phone@0, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13210681]]}, projection=[c_phone, c_acctbal], predicate=c_acctbal@5 > Some(0),11,2 AND Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) AND c_acctbal@5 > Some(0),11,2, pruning_predicate=CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END AND CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END, required_guarantees=[] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=LeftAnti, on=[(c_custkey@0, o_custkey@0)], projection=[c_phone@1, c_acctbal@2] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: Use substr(c_phone@1, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13210681]]}, projection=[c_custkey, c_phone, c_acctbal], predicate=Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_custkey] + diff --git a/testdata/expected-plans/q3.txt b/testdata/expected-plans/q3.txt new file mode 100644 index 0000000..b75c633 --- /dev/null +++ b/testdata/expected-plans/q3.txt @@ -0,0 +1,65 @@ +DataFusion Logical Plan +======================= + +Limit: skip=0, fetch=10 + Sort: revenue DESC NULLS FIRST, orders.o_orderdate ASC NULLS LAST, fetch=10 + Projection: lineitem.l_orderkey, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue, orders.o_orderdate, orders.o_shippriority + Aggregate: groupBy=[[lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority]], aggr=[[sum(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] + Projection: orders.o_orderdate, orders.o_shippriority, lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount + Inner Join: orders.o_orderkey = lineitem.l_orderkey + Projection: orders.o_orderkey, orders.o_orderdate, orders.o_shippriority + Inner Join: customer.c_custkey = orders.o_custkey + Projection: customer.c_custkey + Filter: customer.c_mktsegment = Utf8("BUILDING") + TableScan: customer projection=[c_custkey, c_mktsegment], partial_filters=[customer.c_mktsegment = Utf8("BUILDING")] + Filter: orders.o_orderdate < Date32("1995-03-15") + TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], partial_filters=[orders.o_orderdate < Date32("1995-03-15")] + Projection: lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount + Filter: lineitem.l_shipdate > Date32("1995-03-15") + TableScan: lineitem projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], partial_filters=[lineitem.l_shipdate > Date32("1995-03-15")] + +DataFusion Physical Plan +======================== + +SortExec: TopK(fetch=10), expr=[revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@3 as revenue, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority] + AggregateExec: mode=Single, gby=[l_orderkey@2 as l_orderkey, o_orderdate@0 as o_orderdate, o_shippriority@1 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, o_shippriority@2, l_orderkey@3, l_extendedprice@4, l_discount@5] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[o_orderkey@1, o_orderdate@3, o_shippriority@4] + ProjectionExec: expr=[c_custkey@0 as c_custkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: c_mktsegment@1 = BUILDING + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13210681]]}, projection=[c_custkey, c_mktsegment], predicate=c_mktsegment@6 = BUILDING, pruning_predicate=CASE WHEN c_mktsegment_null_count@2 = c_mktsegment_row_count@3 THEN false ELSE c_mktsegment_min@0 <= BUILDING AND BUILDING <= c_mktsegment_max@1 END, required_guarantees=[c_mktsegment in (BUILDING)] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@2 < 1995-03-15 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], predicate=o_orderdate@4 < 1995-03-15, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@0 < 1995-03-15 END, required_guarantees=[] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@3 > 1995-03-15 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 > 1995-03-15, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 > 1995-03-15 END, required_guarantees=[] + +RaySQL Plan +=========== + +Query Stage #0 (1 -> 1): +SortExec: TopK(fetch=10), expr=[revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@3 as revenue, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority] + AggregateExec: mode=Single, gby=[l_orderkey@2 as l_orderkey, o_orderdate@0 as o_orderdate, o_shippriority@1 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, o_shippriority@2, l_orderkey@3, l_extendedprice@4, l_discount@5] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[o_orderkey@1, o_orderdate@3, o_shippriority@4] + ProjectionExec: expr=[c_custkey@0 as c_custkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: c_mktsegment@1 = BUILDING + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13210681]]}, projection=[c_custkey, c_mktsegment], predicate=c_mktsegment@6 = BUILDING, pruning_predicate=CASE WHEN c_mktsegment_null_count@2 = c_mktsegment_row_count@3 THEN false ELSE c_mktsegment_min@0 <= BUILDING AND BUILDING <= c_mktsegment_max@1 END, required_guarantees=[c_mktsegment in (BUILDING)] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@2 < 1995-03-15 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], predicate=o_orderdate@4 < 1995-03-15, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@0 < 1995-03-15 END, required_guarantees=[] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@3 > 1995-03-15 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 > 1995-03-15, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 > 1995-03-15 END, required_guarantees=[] + diff --git a/testdata/expected-plans/q4.txt b/testdata/expected-plans/q4.txt new file mode 100644 index 0000000..675f134 --- /dev/null +++ b/testdata/expected-plans/q4.txt @@ -0,0 +1,51 @@ +DataFusion Logical Plan +======================= + +Sort: orders.o_orderpriority ASC NULLS LAST + Projection: orders.o_orderpriority, count(*) AS order_count + Aggregate: groupBy=[[orders.o_orderpriority]], aggr=[[count(Int64(1)) AS count(*)]] + Projection: orders.o_orderpriority + LeftSemi Join: orders.o_orderkey = __correlated_sq_1.l_orderkey + Projection: orders.o_orderkey, orders.o_orderpriority + Filter: orders.o_orderdate >= Date32("1995-04-01") AND orders.o_orderdate < Date32("1995-07-01") + TableScan: orders projection=[o_orderkey, o_orderdate, o_orderpriority], partial_filters=[orders.o_orderdate >= Date32("1995-04-01"), orders.o_orderdate < Date32("1995-07-01")] + SubqueryAlias: __correlated_sq_1 + Projection: lineitem.l_orderkey + Filter: lineitem.l_receiptdate > lineitem.l_commitdate + TableScan: lineitem projection=[l_orderkey, l_commitdate, l_receiptdate], partial_filters=[lineitem.l_receiptdate > lineitem.l_commitdate] + +DataFusion Physical Plan +======================== + +SortExec: expr=[o_orderpriority@0 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[o_orderpriority@0 as o_orderpriority, count(*)@1 as order_count] + AggregateExec: mode=Single, gby=[o_orderpriority@0 as o_orderpriority], aggr=[count(*)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=LeftSemi, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderpriority@1] + ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_orderpriority@2 as o_orderpriority] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@1 >= 1995-04-01 AND o_orderdate@1 < 1995-07-01 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_orderdate, o_orderpriority], predicate=o_orderdate@4 >= 1995-04-01 AND o_orderdate@4 < 1995-07-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-04-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-07-01 END, required_guarantees=[] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_receiptdate@2 > l_commitdate@1 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + +RaySQL Plan +=========== + +Query Stage #0 (1 -> 1): +SortExec: expr=[o_orderpriority@0 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[o_orderpriority@0 as o_orderpriority, count(*)@1 as order_count] + AggregateExec: mode=Single, gby=[o_orderpriority@0 as o_orderpriority], aggr=[count(*)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=LeftSemi, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderpriority@1] + ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_orderpriority@2 as o_orderpriority] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@1 >= 1995-04-01 AND o_orderdate@1 < 1995-07-01 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_orderdate, o_orderpriority], predicate=o_orderdate@4 >= 1995-04-01 AND o_orderdate@4 < 1995-07-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-04-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-07-01 END, required_guarantees=[] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_receiptdate@2 > l_commitdate@1 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + diff --git a/testdata/expected-plans/q5.txt b/testdata/expected-plans/q5.txt new file mode 100644 index 0000000..8cfea51 --- /dev/null +++ b/testdata/expected-plans/q5.txt @@ -0,0 +1,90 @@ +DataFusion Logical Plan +======================= + +Sort: revenue DESC NULLS FIRST + Projection: nation.n_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue + Aggregate: groupBy=[[nation.n_name]], aggr=[[sum(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] + Projection: lineitem.l_extendedprice, lineitem.l_discount, nation.n_name + Inner Join: nation.n_regionkey = region.r_regionkey + Projection: lineitem.l_extendedprice, lineitem.l_discount, nation.n_name, nation.n_regionkey + Inner Join: supplier.s_nationkey = nation.n_nationkey + Projection: lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey + Inner Join: lineitem.l_suppkey = supplier.s_suppkey, customer.c_nationkey = supplier.s_nationkey + Projection: customer.c_nationkey, lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount + Inner Join: orders.o_orderkey = lineitem.l_orderkey + Projection: customer.c_nationkey, orders.o_orderkey + Inner Join: customer.c_custkey = orders.o_custkey + TableScan: customer projection=[c_custkey, c_nationkey] + Projection: orders.o_orderkey, orders.o_custkey + Filter: orders.o_orderdate >= Date32("1994-01-01") AND orders.o_orderdate < Date32("1995-01-01") + TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate], partial_filters=[orders.o_orderdate >= Date32("1994-01-01"), orders.o_orderdate < Date32("1995-01-01")] + TableScan: lineitem projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] + TableScan: supplier projection=[s_suppkey, s_nationkey] + TableScan: nation projection=[n_nationkey, n_name, n_regionkey] + Projection: region.r_regionkey + Filter: region.r_name = Utf8("AFRICA") + TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = Utf8("AFRICA")] + +DataFusion Physical Plan +======================== + +SortExec: expr=[revenue@1 DESC], preserve_partitioning=[false] + ProjectionExec: expr=[n_name@0 as n_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as revenue] + AggregateExec: mode=Single, gby=[n_name@2 as n_name], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, n_name@3] + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: r_name@1 = AFRICA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = AFRICA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= AFRICA AND AFRICA <= r_name_max@1 END, required_guarantees=[r_name in (AFRICA)] + ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, n_name@0 as n_name, n_regionkey@1 as n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, n_regionkey@2, l_extendedprice@3, l_discount@4] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1), (s_nationkey@1, c_nationkey@0)], projection=[s_nationkey@1, l_extendedprice@4, l_discount@5] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@1, l_orderkey@0)], projection=[c_nationkey@0, l_suppkey@3, l_extendedprice@4, l_discount@5] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_nationkey@1, o_orderkey@2] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13210681]]}, projection=[c_custkey, c_nationkey] + ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@2 >= 1994-01-01 AND o_orderdate@2 < 1995-01-01 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1994-01-01 AND o_orderdate@4 < 1995-01-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1994-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-01-01 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] + +RaySQL Plan +=========== + +Query Stage #0 (1 -> 1): +SortExec: expr=[revenue@1 DESC], preserve_partitioning=[false] + ProjectionExec: expr=[n_name@0 as n_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as revenue] + AggregateExec: mode=Single, gby=[n_name@2 as n_name], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, n_name@3] + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: r_name@1 = AFRICA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = AFRICA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= AFRICA AND AFRICA <= r_name_max@1 END, required_guarantees=[r_name in (AFRICA)] + ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, n_name@0 as n_name, n_regionkey@1 as n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, n_regionkey@2, l_extendedprice@3, l_discount@4] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1), (s_nationkey@1, c_nationkey@0)], projection=[s_nationkey@1, l_extendedprice@4, l_discount@5] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@1, l_orderkey@0)], projection=[c_nationkey@0, l_suppkey@3, l_extendedprice@4, l_discount@5] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_nationkey@1, o_orderkey@2] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13210681]]}, projection=[c_custkey, c_nationkey] + ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@2 >= 1994-01-01 AND o_orderdate@2 < 1995-01-01 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1994-01-01 AND o_orderdate@4 < 1995-01-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1994-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-01-01 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] + diff --git a/testdata/expected-plans/q6.txt b/testdata/expected-plans/q6.txt new file mode 100644 index 0000000..7d6ff2d --- /dev/null +++ b/testdata/expected-plans/q6.txt @@ -0,0 +1,30 @@ +DataFusion Logical Plan +======================= + +Projection: sum(lineitem.l_extendedprice * lineitem.l_discount) AS revenue + Aggregate: groupBy=[[]], aggr=[[sum(lineitem.l_extendedprice * lineitem.l_discount)]] + Projection: lineitem.l_extendedprice, lineitem.l_discount + Filter: lineitem.l_shipdate >= Date32("1994-01-01") AND lineitem.l_shipdate < Date32("1995-01-01") AND lineitem.l_discount >= Decimal128(Some(3),11,2) AND lineitem.l_discount <= Decimal128(Some(5),11,2) AND lineitem.l_quantity < Decimal128(Some(2400),11,2) + TableScan: lineitem projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], partial_filters=[lineitem.l_shipdate >= Date32("1994-01-01"), lineitem.l_shipdate < Date32("1995-01-01"), lineitem.l_discount >= Decimal128(Some(3),11,2), lineitem.l_discount <= Decimal128(Some(5),11,2), lineitem.l_quantity < Decimal128(Some(2400),11,2)] + +DataFusion Physical Plan +======================== + +ProjectionExec: expr=[sum(lineitem.l_extendedprice * lineitem.l_discount)@0 as revenue] + AggregateExec: mode=Single, gby=[], aggr=[sum(lineitem.l_extendedprice * lineitem.l_discount)] + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@3 >= 1994-01-01 AND l_shipdate@3 < 1995-01-01 AND l_discount@2 >= Some(3),11,2 AND l_discount@2 <= Some(5),11,2 AND l_quantity@0 < Some(2400),11,2 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1994-01-01 AND l_shipdate@10 < 1995-01-01 AND l_discount@6 >= Some(3),11,2 AND l_discount@6 <= Some(5),11,2 AND l_quantity@4 < Some(2400),11,2, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1994-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-01-01 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_max@4 >= Some(3),11,2 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_min@7 <= Some(5),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@8 < Some(2400),11,2 END, required_guarantees=[] + +RaySQL Plan +=========== + +Query Stage #0 (1 -> 1): +ProjectionExec: expr=[sum(lineitem.l_extendedprice * lineitem.l_discount)@0 as revenue] + AggregateExec: mode=Single, gby=[], aggr=[sum(lineitem.l_extendedprice * lineitem.l_discount)] + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@3 >= 1994-01-01 AND l_shipdate@3 < 1995-01-01 AND l_discount@2 >= Some(3),11,2 AND l_discount@2 <= Some(5),11,2 AND l_quantity@0 < Some(2400),11,2 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1994-01-01 AND l_shipdate@10 < 1995-01-01 AND l_discount@6 >= Some(3),11,2 AND l_discount@6 <= Some(5),11,2 AND l_quantity@4 < Some(2400),11,2, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1994-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-01-01 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_max@4 >= Some(3),11,2 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_min@7 <= Some(5),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@8 < Some(2400),11,2 END, required_guarantees=[] + diff --git a/testdata/expected-plans/q7.txt b/testdata/expected-plans/q7.txt new file mode 100644 index 0000000..2d820c5 --- /dev/null +++ b/testdata/expected-plans/q7.txt @@ -0,0 +1,94 @@ +DataFusion Logical Plan +======================= + +Sort: shipping.supp_nation ASC NULLS LAST, shipping.cust_nation ASC NULLS LAST, shipping.l_year ASC NULLS LAST + Projection: shipping.supp_nation, shipping.cust_nation, shipping.l_year, sum(shipping.volume) AS revenue + Aggregate: groupBy=[[shipping.supp_nation, shipping.cust_nation, shipping.l_year]], aggr=[[sum(shipping.volume)]] + SubqueryAlias: shipping + Projection: n1.n_name AS supp_nation, n2.n_name AS cust_nation, date_part(Utf8("YEAR"), lineitem.l_shipdate) AS l_year, lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount) AS volume + Inner Join: customer.c_nationkey = n2.n_nationkey Filter: n1.n_name = Utf8("GERMANY") AND n2.n_name = Utf8("IRAQ") OR n1.n_name = Utf8("IRAQ") AND n2.n_name = Utf8("GERMANY") + Projection: lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate, customer.c_nationkey, n1.n_name + Inner Join: supplier.s_nationkey = n1.n_nationkey + Projection: supplier.s_nationkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate, customer.c_nationkey + Inner Join: orders.o_custkey = customer.c_custkey + Projection: supplier.s_nationkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate, orders.o_custkey + Inner Join: lineitem.l_orderkey = orders.o_orderkey + Projection: supplier.s_nationkey, lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate + Inner Join: supplier.s_suppkey = lineitem.l_suppkey + TableScan: supplier projection=[s_suppkey, s_nationkey] + Filter: lineitem.l_shipdate >= Date32("1995-01-01") AND lineitem.l_shipdate <= Date32("1996-12-31") + TableScan: lineitem projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], partial_filters=[lineitem.l_shipdate >= Date32("1995-01-01"), lineitem.l_shipdate <= Date32("1996-12-31")] + TableScan: orders projection=[o_orderkey, o_custkey] + TableScan: customer projection=[c_custkey, c_nationkey] + SubqueryAlias: n1 + Filter: nation.n_name = Utf8("GERMANY") OR nation.n_name = Utf8("IRAQ") + TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("GERMANY") OR nation.n_name = Utf8("IRAQ")] + SubqueryAlias: n2 + Filter: nation.n_name = Utf8("IRAQ") OR nation.n_name = Utf8("GERMANY") + TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("IRAQ") OR nation.n_name = Utf8("GERMANY")] + +DataFusion Physical Plan +======================== + +SortExec: expr=[supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS LAST,l_year@2 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year, sum(shipping.volume)@3 as revenue] + AggregateExec: mode=Single, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[sum(shipping.volume)] + ProjectionExec: expr=[n_name@4 as supp_nation, n_name@0 as cust_nation, date_part(YEAR, l_shipdate@3) as l_year, l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as volume] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], filter=n_name@0 = GERMANY AND n_name@1 = IRAQ OR n_name@0 = IRAQ AND n_name@1 = GERMANY, projection=[n_name@1, l_extendedprice@2, l_discount@3, l_shipdate@4, n_name@6] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = IRAQ OR n_name@1 = GERMANY + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = IRAQ OR n_name@1 = GERMANY, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END, required_guarantees=[n_name in (IRAQ, GERMANY)] + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_shipdate@3 as l_shipdate, c_nationkey@4 as c_nationkey, n_name@0 as n_name] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@0)], projection=[n_name@1, l_extendedprice@3, l_discount@4, l_shipdate@5, c_nationkey@6] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = GERMANY OR n_name@1 = IRAQ + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = GERMANY OR n_name@1 = IRAQ, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] + ProjectionExec: expr=[s_nationkey@1 as s_nationkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, l_shipdate@4 as l_shipdate, c_nationkey@0 as c_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@4)], projection=[c_nationkey@1, s_nationkey@2, l_extendedprice@3, l_discount@4, l_shipdate@5] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13210681]]}, projection=[c_custkey, c_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(l_orderkey@1, o_orderkey@0)], projection=[s_nationkey@0, l_extendedprice@2, l_discount@3, l_shipdate@4, o_custkey@6] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5, l_shipdate@6] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@4 >= 1995-01-01 AND l_shipdate@4 <= 1996-12-31 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-01-01 AND l_shipdate@10 <= 1996-12-31, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 <= 1996-12-31 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_custkey] + +RaySQL Plan +=========== + +Query Stage #0 (1 -> 1): +SortExec: expr=[supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS LAST,l_year@2 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year, sum(shipping.volume)@3 as revenue] + AggregateExec: mode=Single, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[sum(shipping.volume)] + ProjectionExec: expr=[n_name@4 as supp_nation, n_name@0 as cust_nation, date_part(YEAR, l_shipdate@3) as l_year, l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as volume] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], filter=n_name@0 = GERMANY AND n_name@1 = IRAQ OR n_name@0 = IRAQ AND n_name@1 = GERMANY, projection=[n_name@1, l_extendedprice@2, l_discount@3, l_shipdate@4, n_name@6] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = IRAQ OR n_name@1 = GERMANY + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = IRAQ OR n_name@1 = GERMANY, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END, required_guarantees=[n_name in (IRAQ, GERMANY)] + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_shipdate@3 as l_shipdate, c_nationkey@4 as c_nationkey, n_name@0 as n_name] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@0)], projection=[n_name@1, l_extendedprice@3, l_discount@4, l_shipdate@5, c_nationkey@6] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = GERMANY OR n_name@1 = IRAQ + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = GERMANY OR n_name@1 = IRAQ, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] + ProjectionExec: expr=[s_nationkey@1 as s_nationkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, l_shipdate@4 as l_shipdate, c_nationkey@0 as c_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@4)], projection=[c_nationkey@1, s_nationkey@2, l_extendedprice@3, l_discount@4, l_shipdate@5] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13210681]]}, projection=[c_custkey, c_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(l_orderkey@1, o_orderkey@0)], projection=[s_nationkey@0, l_extendedprice@2, l_discount@3, l_shipdate@4, o_custkey@6] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5, l_shipdate@6] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@4 >= 1995-01-01 AND l_shipdate@4 <= 1996-12-31 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-01-01 AND l_shipdate@10 <= 1996-12-31, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 <= 1996-12-31 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_custkey] + diff --git a/testdata/expected-plans/q8.txt b/testdata/expected-plans/q8.txt new file mode 100644 index 0000000..0f2a05c --- /dev/null +++ b/testdata/expected-plans/q8.txt @@ -0,0 +1,124 @@ +DataFusion Logical Plan +======================= + +Sort: all_nations.o_year ASC NULLS LAST + Projection: all_nations.o_year, sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END) / sum(all_nations.volume) AS mkt_share + Aggregate: groupBy=[[all_nations.o_year]], aggr=[[sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Decimal128(Some(0),35,4) END) AS sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)]] + SubqueryAlias: all_nations + Projection: date_part(Utf8("YEAR"), orders.o_orderdate) AS o_year, lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount) AS volume, n2.n_name AS nation + Inner Join: n1.n_regionkey = region.r_regionkey + Projection: lineitem.l_extendedprice, lineitem.l_discount, orders.o_orderdate, n1.n_regionkey, n2.n_name + Inner Join: supplier.s_nationkey = n2.n_nationkey + Projection: lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey, orders.o_orderdate, n1.n_regionkey + Inner Join: customer.c_nationkey = n1.n_nationkey + Projection: lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey, orders.o_orderdate, customer.c_nationkey + Inner Join: orders.o_custkey = customer.c_custkey + Projection: lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey, orders.o_custkey, orders.o_orderdate + Inner Join: lineitem.l_orderkey = orders.o_orderkey + Projection: lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey + Inner Join: lineitem.l_suppkey = supplier.s_suppkey + Projection: lineitem.l_orderkey, lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount + Inner Join: part.p_partkey = lineitem.l_partkey + Projection: part.p_partkey + Filter: part.p_type = Utf8("LARGE PLATED STEEL") + TableScan: part projection=[p_partkey, p_type], partial_filters=[part.p_type = Utf8("LARGE PLATED STEEL")] + TableScan: lineitem projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] + TableScan: supplier projection=[s_suppkey, s_nationkey] + Filter: orders.o_orderdate >= Date32("1995-01-01") AND orders.o_orderdate <= Date32("1996-12-31") + TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate], partial_filters=[orders.o_orderdate >= Date32("1995-01-01"), orders.o_orderdate <= Date32("1996-12-31")] + TableScan: customer projection=[c_custkey, c_nationkey] + SubqueryAlias: n1 + TableScan: nation projection=[n_nationkey, n_regionkey] + SubqueryAlias: n2 + TableScan: nation projection=[n_nationkey, n_name] + Projection: region.r_regionkey + Filter: region.r_name = Utf8("MIDDLE EAST") + TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = Utf8("MIDDLE EAST")] + +DataFusion Physical Plan +======================== + +SortExec: expr=[o_year@0 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[o_year@0 as o_year, sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END)@1 / sum(all_nations.volume)@2 as mkt_share] + AggregateExec: mode=Single, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)] + ProjectionExec: expr=[date_part(YEAR, o_orderdate@2) as o_year, l_extendedprice@0 * (Some(1),20,0 - l_discount@1) as volume, n_name@3 as nation] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, o_orderdate@3, n_name@5] + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: r_name@1 = MIDDLE EAST + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = MIDDLE EAST, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= MIDDLE EAST AND MIDDLE EAST <= r_name_max@1 END, required_guarantees=[r_name in (MIDDLE EAST)] + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, o_orderdate@3 as o_orderdate, n_regionkey@4 as n_regionkey, n_name@0 as n_name] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, l_extendedprice@2, l_discount@3, o_orderdate@5, n_regionkey@6] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, n_regionkey@0 as n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, c_nationkey@4)], projection=[n_regionkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@5] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_regionkey] + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, c_nationkey@0 as c_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@3)], projection=[c_nationkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@6] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13210681]]}, projection=[c_custkey, c_nationkey] + ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, o_custkey@0 as o_custkey, o_orderdate@1 as o_orderdate] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_custkey@1, o_orderdate@2, l_extendedprice@4, l_discount@5, s_nationkey@6] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@2 >= 1995-01-01 AND o_orderdate@2 <= 1996-12-31 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1995-01-01 AND o_orderdate@4 <= 1996-12-31, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 <= 1996-12-31 END, required_guarantees=[] + ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_suppkey@3, l_extendedprice@4, l_discount@5] + ProjectionExec: expr=[p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_type@1 = LARGE PLATED STEEL + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_type], predicate=p_type@4 = LARGE PLATED STEEL, pruning_predicate=CASE WHEN p_type_null_count@2 = p_type_row_count@3 THEN false ELSE p_type_min@0 <= LARGE PLATED STEEL AND LARGE PLATED STEEL <= p_type_max@1 END, required_guarantees=[p_type in (LARGE PLATED STEEL)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] + +RaySQL Plan +=========== + +Query Stage #0 (1 -> 1): +SortExec: expr=[o_year@0 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[o_year@0 as o_year, sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END)@1 / sum(all_nations.volume)@2 as mkt_share] + AggregateExec: mode=Single, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)] + ProjectionExec: expr=[date_part(YEAR, o_orderdate@2) as o_year, l_extendedprice@0 * (Some(1),20,0 - l_discount@1) as volume, n_name@3 as nation] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, o_orderdate@3, n_name@5] + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: r_name@1 = MIDDLE EAST + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = MIDDLE EAST, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= MIDDLE EAST AND MIDDLE EAST <= r_name_max@1 END, required_guarantees=[r_name in (MIDDLE EAST)] + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, o_orderdate@3 as o_orderdate, n_regionkey@4 as n_regionkey, n_name@0 as n_name] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, l_extendedprice@2, l_discount@3, o_orderdate@5, n_regionkey@6] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, n_regionkey@0 as n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, c_nationkey@4)], projection=[n_regionkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@5] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_regionkey] + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, c_nationkey@0 as c_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@3)], projection=[c_nationkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@6] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13210681]]}, projection=[c_custkey, c_nationkey] + ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, o_custkey@0 as o_custkey, o_orderdate@1 as o_orderdate] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_custkey@1, o_orderdate@2, l_extendedprice@4, l_discount@5, s_nationkey@6] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@2 >= 1995-01-01 AND o_orderdate@2 <= 1996-12-31 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1995-01-01 AND o_orderdate@4 <= 1996-12-31, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 <= 1996-12-31 END, required_guarantees=[] + ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_suppkey@3, l_extendedprice@4, l_discount@5] + ProjectionExec: expr=[p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_type@1 = LARGE PLATED STEEL + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_type], predicate=p_type@4 = LARGE PLATED STEEL, pruning_predicate=CASE WHEN p_type_null_count@2 = p_type_row_count@3 THEN false ELSE p_type_min@0 <= LARGE PLATED STEEL AND LARGE PLATED STEEL <= p_type_max@1 END, required_guarantees=[p_type in (LARGE PLATED STEEL)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] + diff --git a/testdata/expected-plans/q9.txt b/testdata/expected-plans/q9.txt new file mode 100644 index 0000000..886476a --- /dev/null +++ b/testdata/expected-plans/q9.txt @@ -0,0 +1,87 @@ +DataFusion Logical Plan +======================= + +Sort: profit.nation ASC NULLS LAST, profit.o_year DESC NULLS FIRST + Projection: profit.nation, profit.o_year, sum(profit.amount) AS sum_profit + Aggregate: groupBy=[[profit.nation, profit.o_year]], aggr=[[sum(profit.amount)]] + SubqueryAlias: profit + Projection: nation.n_name AS nation, date_part(Utf8("YEAR"), orders.o_orderdate) AS o_year, lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount) - partsupp.ps_supplycost * lineitem.l_quantity AS amount + Inner Join: supplier.s_nationkey = nation.n_nationkey + Projection: lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey, partsupp.ps_supplycost, orders.o_orderdate + Inner Join: lineitem.l_orderkey = orders.o_orderkey + Projection: lineitem.l_orderkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey, partsupp.ps_supplycost + Inner Join: lineitem.l_suppkey = partsupp.ps_suppkey, lineitem.l_partkey = partsupp.ps_partkey + Projection: lineitem.l_orderkey, lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey + Inner Join: lineitem.l_suppkey = supplier.s_suppkey + Projection: lineitem.l_orderkey, lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount + Inner Join: part.p_partkey = lineitem.l_partkey + Projection: part.p_partkey + Filter: part.p_name LIKE Utf8("%moccasin%") + TableScan: part projection=[p_partkey, p_name], partial_filters=[part.p_name LIKE Utf8("%moccasin%")] + TableScan: lineitem projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] + TableScan: supplier projection=[s_suppkey, s_nationkey] + TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost] + TableScan: orders projection=[o_orderkey, o_orderdate] + TableScan: nation projection=[n_nationkey, n_name] + +DataFusion Physical Plan +======================== + +SortExec: expr=[nation@0 ASC NULLS LAST,o_year@1 DESC], preserve_partitioning=[false] + ProjectionExec: expr=[nation@0 as nation, o_year@1 as o_year, sum(profit.amount)@2 as sum_profit] + AggregateExec: mode=Single, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)] + ProjectionExec: expr=[n_name@0 as nation, date_part(YEAR, o_orderdate@5) as o_year, l_extendedprice@2 * (Some(1),20,0 - l_discount@3) - ps_supplycost@4 * l_quantity@1 as amount] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[n_name@1, l_quantity@2, l_extendedprice@3, l_discount@4, ps_supplycost@6, o_orderdate@7] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] + ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, ps_supplycost@5 as ps_supplycost, o_orderdate@0 as o_orderdate] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, l_quantity@3, l_extendedprice@4, l_discount@5, s_nationkey@6, ps_supplycost@7] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_orderdate] + ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_quantity@2 as l_quantity, l_extendedprice@3 as l_extendedprice, l_discount@4 as l_discount, s_nationkey@5 as s_nationkey, ps_supplycost@0 as ps_supplycost] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(ps_suppkey@1, l_suppkey@2), (ps_partkey@0, l_partkey@1)], projection=[ps_supplycost@2, l_orderkey@3, l_quantity@6, l_extendedprice@7, l_discount@8, s_nationkey@9] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41093580]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_partkey@2 as l_partkey, l_suppkey@3 as l_suppkey, l_quantity@4 as l_quantity, l_extendedprice@5 as l_extendedprice, l_discount@6 as l_discount, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@2)], projection=[s_nationkey@1, l_orderkey@2, l_partkey@3, l_suppkey@4, l_quantity@5, l_extendedprice@6, l_discount@7] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_partkey@2, l_suppkey@3, l_quantity@4, l_extendedprice@5, l_discount@6] + ProjectionExec: expr=[p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_name@1 LIKE %moccasin% + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE %moccasin% + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] + +RaySQL Plan +=========== + +Query Stage #0 (1 -> 1): +SortExec: expr=[nation@0 ASC NULLS LAST,o_year@1 DESC], preserve_partitioning=[false] + ProjectionExec: expr=[nation@0 as nation, o_year@1 as o_year, sum(profit.amount)@2 as sum_profit] + AggregateExec: mode=Single, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)] + ProjectionExec: expr=[n_name@0 as nation, date_part(YEAR, o_orderdate@5) as o_year, l_extendedprice@2 * (Some(1),20,0 - l_discount@3) - ps_supplycost@4 * l_quantity@1 as amount] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[n_name@1, l_quantity@2, l_extendedprice@3, l_discount@4, ps_supplycost@6, o_orderdate@7] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] + ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, ps_supplycost@5 as ps_supplycost, o_orderdate@0 as o_orderdate] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, l_quantity@3, l_extendedprice@4, l_discount@5, s_nationkey@6, ps_supplycost@7] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_orderdate] + ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_quantity@2 as l_quantity, l_extendedprice@3 as l_extendedprice, l_discount@4 as l_discount, s_nationkey@5 as s_nationkey, ps_supplycost@0 as ps_supplycost] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(ps_suppkey@1, l_suppkey@2), (ps_partkey@0, l_partkey@1)], projection=[ps_supplycost@2, l_orderkey@3, l_quantity@6, l_extendedprice@7, l_discount@8, s_nationkey@9] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41093580]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_partkey@2 as l_partkey, l_suppkey@3 as l_suppkey, l_quantity@4 as l_quantity, l_extendedprice@5 as l_extendedprice, l_discount@6 as l_discount, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@2)], projection=[s_nationkey@1, l_orderkey@2, l_partkey@3, l_suppkey@4, l_quantity@5, l_extendedprice@6, l_discount@7] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_partkey@2, l_suppkey@3, l_quantity@4, l_extendedprice@5, l_discount@6] + ProjectionExec: expr=[p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_name@1 LIKE %moccasin% + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE %moccasin% + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] + From 2ee71cf1bed838c9acc635f879e43ed5450fdcce Mon Sep 17 00:00:00 2001 From: Edmondo Porcu Date: Thu, 3 Oct 2024 16:39:55 -0400 Subject: [PATCH 06/14] Fixing tests --- .github/workflows/rust.yml | 4 ++-- .gitignore | 1 + scripts/gen-test-data.sh | 22 ++++++++++++++++++-- testdata/expected-plans/q1.txt | 4 ++-- testdata/expected-plans/q10.txt | 16 +++++++-------- testdata/expected-plans/q11.txt | 24 +++++++++++----------- testdata/expected-plans/q12.txt | 8 ++++---- testdata/expected-plans/q13.txt | 8 ++++---- testdata/expected-plans/q14.txt | 8 ++++---- testdata/expected-plans/q16.txt | 12 +++++------ testdata/expected-plans/q17.txt | 12 +++++------ testdata/expected-plans/q18.txt | 16 +++++++-------- testdata/expected-plans/q19.txt | 8 ++++---- testdata/expected-plans/q2.txt | 36 ++++++++++++++++----------------- testdata/expected-plans/q20.txt | 20 +++++++++--------- testdata/expected-plans/q21.txt | 24 +++++++++++----------- testdata/expected-plans/q22.txt | 12 +++++------ testdata/expected-plans/q3.txt | 12 +++++------ testdata/expected-plans/q4.txt | 8 ++++---- testdata/expected-plans/q5.txt | 24 +++++++++++----------- testdata/expected-plans/q6.txt | 4 ++-- testdata/expected-plans/q7.txt | 24 +++++++++++----------- testdata/expected-plans/q8.txt | 32 ++++++++++++++--------------- testdata/expected-plans/q9.txt | 24 +++++++++++----------- tpch/tpchgen.py | 4 ++-- 25 files changed, 193 insertions(+), 174 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 60c2bb8..5f355f2 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -7,8 +7,8 @@ on: env: CARGO_TERM_COLOR: always PYTHON_VERSION: 3.9 - TPCH_SAMPLING_RATE: "1" # (1/100) - TPCH_TEST_PARTITIONS: "1" + TPCH_SAMPLING_RATE: "0.1" + TPCH_TEST_PARTITIONS: "2" TPCH_DATA_PATH: "data" jobs: diff --git a/.gitignore b/.gitignore index 1a7e859..fdbe035 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ venv *.log results-sf* data +tpch/tpch-dbgen diff --git a/scripts/gen-test-data.sh b/scripts/gen-test-data.sh index 23772f2..4c7560a 100755 --- a/scripts/gen-test-data.sh +++ b/scripts/gen-test-data.sh @@ -1,6 +1,24 @@ #!/bin/bash set -e +# Create necessary directories mkdir -p data -python -m tpch.tpchgen generate --scale-factor "$TPCH_SAMPLING_RATE" --partitions "$TPCH_TEST_PARTITIONS" -python -m tpch.tpchgen convert --partitions "$TPCH_TEST_PARTITIONS" +mkdir -p test_files/tpch/data + +# Check if the data folder is empty +if [ -z "$(ls -A data)" ]; then + echo "Data folder is empty. Cloning repository..." + git clone https://github.com/databricks/tpch-dbgen.git tpch/tpch-dbgen + cd tpch/tpch-dbgen + make + # consistent with DataFusion test strategy + ./dbgen -f -s "$TPCH_SAMPLING_RATE" + pwd + ls + mv ./*.tbl ../../data + cd ../../ + pwd + python -m tpch.tpchgen convert --partitions "$TPCH_TEST_PARTITIONS" +else + echo "Data folder is not empty. Skipping cloning and data generation." +fi diff --git a/testdata/expected-plans/q1.txt b/testdata/expected-plans/q1.txt index c5b6928..aaf26ad 100644 --- a/testdata/expected-plans/q1.txt +++ b/testdata/expected-plans/q1.txt @@ -17,7 +17,7 @@ SortExec: expr=[l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST], pr ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, l_quantity@0 as l_quantity, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_tax@3 as l_tax, l_returnflag@4 as l_returnflag, l_linestatus@5 as l_linestatus] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@6 <= 1998-09-24 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], predicate=l_shipdate@10 <= 1998-09-24, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@0 <= 1998-09-24 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], predicate=l_shipdate@10 <= 1998-09-24, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@0 <= 1998-09-24 END, required_guarantees=[] RaySQL Plan =========== @@ -29,5 +29,5 @@ SortExec: expr=[l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST], pr ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, l_quantity@0 as l_quantity, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_tax@3 as l_tax, l_returnflag@4 as l_returnflag, l_linestatus@5 as l_linestatus] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@6 <= 1998-09-24 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], predicate=l_shipdate@10 <= 1998-09-24, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@0 <= 1998-09-24 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], predicate=l_shipdate@10 <= 1998-09-24, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@0 <= 1998-09-24 END, required_guarantees=[] diff --git a/testdata/expected-plans/q10.txt b/testdata/expected-plans/q10.txt index 0e84a0d..417763d 100644 --- a/testdata/expected-plans/q10.txt +++ b/testdata/expected-plans/q10.txt @@ -29,20 +29,20 @@ SortExec: TopK(fetch=20), expr=[revenue@2 DESC], preserve_partitioning=[false] ProjectionExec: expr=[c_custkey@1 as c_custkey, c_name@2 as c_name, c_address@3 as c_address, c_phone@4 as c_phone, c_acctbal@5 as c_acctbal, c_comment@6 as c_comment, l_extendedprice@7 as l_extendedprice, l_discount@8 as l_discount, n_name@0 as n_name] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], projection=[n_name@1, c_custkey@2, c_name@3, c_address@4, c_phone@6, c_acctbal@7, c_comment@8, l_extendedprice@9, l_discount@10] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@7, l_orderkey@0)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, l_extendedprice@9, l_discount@10] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, o_orderkey@7] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13210681]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@2 >= 1993-07-01 AND o_orderdate@2 < 1993-10-01 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1993-07-01 AND o_orderdate@4 < 1993-10-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1993-07-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1993-10-01 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1993-07-01 AND o_orderdate@4 < 1993-10-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1993-07-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1993-10-01 END, required_guarantees=[] ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_returnflag@3 = R - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], predicate=l_returnflag@8 = R, pruning_predicate=CASE WHEN l_returnflag_null_count@2 = l_returnflag_row_count@3 THEN false ELSE l_returnflag_min@0 <= R AND R <= l_returnflag_max@1 END, required_guarantees=[l_returnflag in (R)] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], predicate=l_returnflag@8 = R, pruning_predicate=CASE WHEN l_returnflag_null_count@2 = l_returnflag_row_count@3 THEN false ELSE l_returnflag_min@0 <= R AND R <= l_returnflag_max@1 END, required_guarantees=[l_returnflag in (R)] RaySQL Plan =========== @@ -54,18 +54,18 @@ SortExec: TopK(fetch=20), expr=[revenue@2 DESC], preserve_partitioning=[false] ProjectionExec: expr=[c_custkey@1 as c_custkey, c_name@2 as c_name, c_address@3 as c_address, c_phone@4 as c_phone, c_acctbal@5 as c_acctbal, c_comment@6 as c_comment, l_extendedprice@7 as l_extendedprice, l_discount@8 as l_discount, n_name@0 as n_name] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], projection=[n_name@1, c_custkey@2, c_name@3, c_address@4, c_phone@6, c_acctbal@7, c_comment@8, l_extendedprice@9, l_discount@10] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@7, l_orderkey@0)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, l_extendedprice@9, l_discount@10] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, o_orderkey@7] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13210681]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@2 >= 1993-07-01 AND o_orderdate@2 < 1993-10-01 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1993-07-01 AND o_orderdate@4 < 1993-10-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1993-07-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1993-10-01 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1993-07-01 AND o_orderdate@4 < 1993-10-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1993-07-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1993-10-01 END, required_guarantees=[] ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_returnflag@3 = R - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], predicate=l_returnflag@8 = R, pruning_predicate=CASE WHEN l_returnflag_null_count@2 = l_returnflag_row_count@3 THEN false ELSE l_returnflag_min@0 <= R AND R <= l_returnflag_max@1 END, required_guarantees=[l_returnflag in (R)] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], predicate=l_returnflag@8 = R, pruning_predicate=CASE WHEN l_returnflag_null_count@2 = l_returnflag_row_count@3 THEN false ELSE l_returnflag_min@0 <= R AND R <= l_returnflag_max@1 END, required_guarantees=[l_returnflag in (R)] diff --git a/testdata/expected-plans/q11.txt b/testdata/expected-plans/q11.txt index 8be5c18..abfb957 100644 --- a/testdata/expected-plans/q11.txt +++ b/testdata/expected-plans/q11.txt @@ -40,24 +40,24 @@ SortExec: expr=[value@1 DESC], preserve_partitioning=[false] ProjectionExec: expr=[n_nationkey@0 as n_nationkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = ALGERIA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] ProjectionExec: expr=[ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_nationkey@1, ps_availqty@3, ps_supplycost@4] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41093580]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost] AggregateExec: mode=Single, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[ps_partkey@1, ps_availqty@2, ps_supplycost@3] ProjectionExec: expr=[n_nationkey@0 as n_nationkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = ALGERIA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_availqty@2 as ps_availqty, ps_supplycost@3 as ps_supplycost, s_nationkey@0 as s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_availqty@4, ps_supplycost@5] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41093580]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] RaySQL Plan =========== @@ -73,22 +73,22 @@ SortExec: expr=[value@1 DESC], preserve_partitioning=[false] ProjectionExec: expr=[n_nationkey@0 as n_nationkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = ALGERIA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] ProjectionExec: expr=[ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_nationkey@1, ps_availqty@3, ps_supplycost@4] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41093580]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost] AggregateExec: mode=Single, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[ps_partkey@1, ps_availqty@2, ps_supplycost@3] ProjectionExec: expr=[n_nationkey@0 as n_nationkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = ALGERIA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_availqty@2 as ps_availqty, ps_supplycost@3 as ps_supplycost, s_nationkey@0 as s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_availqty@4, ps_supplycost@5] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41093580]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] diff --git a/testdata/expected-plans/q12.txt b/testdata/expected-plans/q12.txt index 7681fb6..563495c 100644 --- a/testdata/expected-plans/q12.txt +++ b/testdata/expected-plans/q12.txt @@ -23,8 +23,8 @@ SortExec: expr=[l_shipmode@0 ASC NULLS LAST], preserve_partitioning=[false] ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_shipmode@4 as l_shipmode] CoalesceBatchesExec: target_batch_size=8192 FilterExec: (l_shipmode@4 = FOB OR l_shipmode@4 = SHIP) AND l_receiptdate@3 > l_commitdate@2 AND l_shipdate@1 < l_commitdate@2 AND l_receiptdate@3 >= 1995-01-01 AND l_receiptdate@3 < 1996-01-01 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode], predicate=(l_shipmode@14 = FOB OR l_shipmode@14 = SHIP) AND l_receiptdate@12 > l_commitdate@11 AND l_shipdate@10 < l_commitdate@11 AND l_receiptdate@12 >= 1995-01-01 AND l_receiptdate@12 < 1996-01-01, pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= FOB AND FOB <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= SHIP AND SHIP <= l_shipmode_max@1 END) AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_max@4 >= 1995-01-01 END AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_min@7 < 1996-01-01 END, required_guarantees=[l_shipmode in (FOB, SHIP)] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_orderpriority] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode], predicate=(l_shipmode@14 = FOB OR l_shipmode@14 = SHIP) AND l_receiptdate@12 > l_commitdate@11 AND l_shipdate@10 < l_commitdate@11 AND l_receiptdate@12 >= 1995-01-01 AND l_receiptdate@12 < 1996-01-01, pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= FOB AND FOB <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= SHIP AND SHIP <= l_shipmode_max@1 END) AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_max@4 >= 1995-01-01 END AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_min@7 < 1996-01-01 END, required_guarantees=[l_shipmode in (SHIP, FOB)] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_orderpriority] RaySQL Plan =========== @@ -39,6 +39,6 @@ SortExec: expr=[l_shipmode@0 ASC NULLS LAST], preserve_partitioning=[false] ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_shipmode@4 as l_shipmode] CoalesceBatchesExec: target_batch_size=8192 FilterExec: (l_shipmode@4 = FOB OR l_shipmode@4 = SHIP) AND l_receiptdate@3 > l_commitdate@2 AND l_shipdate@1 < l_commitdate@2 AND l_receiptdate@3 >= 1995-01-01 AND l_receiptdate@3 < 1996-01-01 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode], predicate=(l_shipmode@14 = FOB OR l_shipmode@14 = SHIP) AND l_receiptdate@12 > l_commitdate@11 AND l_shipdate@10 < l_commitdate@11 AND l_receiptdate@12 >= 1995-01-01 AND l_receiptdate@12 < 1996-01-01, pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= FOB AND FOB <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= SHIP AND SHIP <= l_shipmode_max@1 END) AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_max@4 >= 1995-01-01 END AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_min@7 < 1996-01-01 END, required_guarantees=[l_shipmode in (FOB, SHIP)] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_orderpriority] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode], predicate=(l_shipmode@14 = FOB OR l_shipmode@14 = SHIP) AND l_receiptdate@12 > l_commitdate@11 AND l_shipdate@10 < l_commitdate@11 AND l_receiptdate@12 >= 1995-01-01 AND l_receiptdate@12 < 1996-01-01, pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= FOB AND FOB <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= SHIP AND SHIP <= l_shipmode_max@1 END) AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_max@4 >= 1995-01-01 END AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_min@7 < 1996-01-01 END, required_guarantees=[l_shipmode in (SHIP, FOB)] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_orderpriority] diff --git a/testdata/expected-plans/q13.txt b/testdata/expected-plans/q13.txt index a49b7d3..446b9b5 100644 --- a/testdata/expected-plans/q13.txt +++ b/testdata/expected-plans/q13.txt @@ -24,11 +24,11 @@ SortExec: expr=[custdist@1 DESC,c_count@0 DESC], preserve_partitioning=[false] AggregateExec: mode=Single, gby=[c_custkey@0 as c_custkey], aggr=[count(orders.o_orderkey)] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Left, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, o_orderkey@1] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13210681]]}, projection=[c_custkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey] ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_comment@2 NOT LIKE %express%requests% - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_custkey, o_comment], predicate=o_comment@8 NOT LIKE %express%requests% + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_comment], predicate=o_comment@8 NOT LIKE %express%requests% RaySQL Plan =========== @@ -41,9 +41,9 @@ SortExec: expr=[custdist@1 DESC,c_count@0 DESC], preserve_partitioning=[false] AggregateExec: mode=Single, gby=[c_custkey@0 as c_custkey], aggr=[count(orders.o_orderkey)] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Left, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, o_orderkey@1] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13210681]]}, projection=[c_custkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey] ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_comment@2 NOT LIKE %express%requests% - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_custkey, o_comment], predicate=o_comment@8 NOT LIKE %express%requests% + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_comment], predicate=o_comment@8 NOT LIKE %express%requests% diff --git a/testdata/expected-plans/q14.txt b/testdata/expected-plans/q14.txt index a7de0f0..b1f39b6 100644 --- a/testdata/expected-plans/q14.txt +++ b/testdata/expected-plans/q14.txt @@ -18,11 +18,11 @@ ProjectionExec: expr=[100 * CAST(sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") T ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, p_type@0 as p_type] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_type@1, l_extendedprice@3, l_discount@4] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_type] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_type] ProjectionExec: expr=[l_partkey@0 as l_partkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@3 >= 1995-02-01 AND l_shipdate@3 < 1995-03-01 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-02-01 AND l_shipdate@10 < 1995-03-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-02-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-03-01 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-02-01 AND l_shipdate@10 < 1995-03-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-02-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-03-01 END, required_guarantees=[] RaySQL Plan =========== @@ -33,9 +33,9 @@ ProjectionExec: expr=[100 * CAST(sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") T ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, p_type@0 as p_type] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_type@1, l_extendedprice@3, l_discount@4] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_type] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_type] ProjectionExec: expr=[l_partkey@0 as l_partkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@3 >= 1995-02-01 AND l_shipdate@3 < 1995-03-01 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-02-01 AND l_shipdate@10 < 1995-03-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-02-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-03-01 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-02-01 AND l_shipdate@10 < 1995-03-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-02-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-03-01 END, required_guarantees=[] diff --git a/testdata/expected-plans/q16.txt b/testdata/expected-plans/q16.txt index d08b84a..e5aeda9 100644 --- a/testdata/expected-plans/q16.txt +++ b/testdata/expected-plans/q16.txt @@ -28,14 +28,14 @@ SortExec: expr=[supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 ASC NULLS ProjectionExec: expr=[s_suppkey@0 as s_suppkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: s_comment@1 LIKE %Customer%Complaints% - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_comment], predicate=s_comment@6 LIKE %Customer%Complaints% + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_comment], predicate=s_comment@6 LIKE %Customer%Complaints% ProjectionExec: expr=[ps_suppkey@3 as ps_suppkey, p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_brand@1, p_type@2, p_size@3, ps_suppkey@5] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_brand@1 != Brand#14 AND p_type@2 NOT LIKE SMALL PLATED% AND Use p_size@3 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]) - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_type, p_size], predicate=p_brand@3 != Brand#14 AND p_type@4 NOT LIKE SMALL PLATED% AND Use p_size@5 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]), pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 != Brand#14 OR Brand#14 != p_brand_max@1 END AND (CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 14 AND 14 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 6 AND 6 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 5 AND 5 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 31 AND 31 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 49 AND 49 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 15 AND 15 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 41 AND 41 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 47 AND 47 <= p_size_max@5 END), required_guarantees=[p_brand not in (Brand#14), p_size in (47, 49, 5, 14, 41, 6, 15, 31)] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41093580]]}, projection=[ps_partkey, ps_suppkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_type, p_size], predicate=p_brand@3 != Brand#14 AND p_type@4 NOT LIKE SMALL PLATED% AND Use p_size@5 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]), pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 != Brand#14 OR Brand#14 != p_brand_max@1 END AND (CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 14 AND 14 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 6 AND 6 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 5 AND 5 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 31 AND 31 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 49 AND 49 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 15 AND 15 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 41 AND 41 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 47 AND 47 <= p_size_max@5 END), required_guarantees=[p_brand not in (Brand#14), p_size in (6, 31, 41, 15, 49, 5, 14, 47)] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey] RaySQL Plan =========== @@ -50,12 +50,12 @@ SortExec: expr=[supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 ASC NULLS ProjectionExec: expr=[s_suppkey@0 as s_suppkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: s_comment@1 LIKE %Customer%Complaints% - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_comment], predicate=s_comment@6 LIKE %Customer%Complaints% + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_comment], predicate=s_comment@6 LIKE %Customer%Complaints% ProjectionExec: expr=[ps_suppkey@3 as ps_suppkey, p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_brand@1, p_type@2, p_size@3, ps_suppkey@5] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_brand@1 != Brand#14 AND p_type@2 NOT LIKE SMALL PLATED% AND Use p_size@3 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]) - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_type, p_size], predicate=p_brand@3 != Brand#14 AND p_type@4 NOT LIKE SMALL PLATED% AND Use p_size@5 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]), pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 != Brand#14 OR Brand#14 != p_brand_max@1 END AND (CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 14 AND 14 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 6 AND 6 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 5 AND 5 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 31 AND 31 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 49 AND 49 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 15 AND 15 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 41 AND 41 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 47 AND 47 <= p_size_max@5 END), required_guarantees=[p_brand not in (Brand#14), p_size in (47, 49, 5, 14, 41, 6, 15, 31)] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41093580]]}, projection=[ps_partkey, ps_suppkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_type, p_size], predicate=p_brand@3 != Brand#14 AND p_type@4 NOT LIKE SMALL PLATED% AND Use p_size@5 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]), pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 != Brand#14 OR Brand#14 != p_brand_max@1 END AND (CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 14 AND 14 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 6 AND 6 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 5 AND 5 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 31 AND 31 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 49 AND 49 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 15 AND 15 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 41 AND 41 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 47 AND 47 <= p_size_max@5 END), required_guarantees=[p_brand not in (Brand#14), p_size in (6, 31, 41, 15, 49, 5, 14, 47)] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey] diff --git a/testdata/expected-plans/q17.txt b/testdata/expected-plans/q17.txt index 327ad98..7a03af1 100644 --- a/testdata/expected-plans/q17.txt +++ b/testdata/expected-plans/q17.txt @@ -29,11 +29,11 @@ ProjectionExec: expr=[CAST(sum(lineitem.l_extendedprice)@0 AS Float64) / 7 as av ProjectionExec: expr=[p_partkey@0 as p_partkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_brand@1 = Brand#42 AND p_container@2 = LG BAG - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_container], predicate=p_brand@3 = Brand#42 AND p_container@6 = LG BAG, pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 <= Brand#42 AND Brand#42 <= p_brand_max@1 END AND CASE WHEN p_container_null_count@6 = p_container_row_count@7 THEN false ELSE p_container_min@4 <= LG BAG AND LG BAG <= p_container_max@5 END, required_guarantees=[p_brand in (Brand#42), p_container in (LG BAG)] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_partkey, l_quantity, l_extendedprice] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_container], predicate=p_brand@3 = Brand#42 AND p_container@6 = LG BAG, pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 <= Brand#42 AND Brand#42 <= p_brand_max@1 END AND CASE WHEN p_container_null_count@6 = p_container_row_count@7 THEN false ELSE p_container_min@4 <= LG BAG AND LG BAG <= p_container_max@5 END, required_guarantees=[p_brand in (Brand#42), p_container in (LG BAG)] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_quantity, l_extendedprice] ProjectionExec: expr=[CAST(0.2 * CAST(avg(lineitem.l_quantity)@1 AS Float64) AS Decimal128(30, 15)) as Float64(0.2) * avg(lineitem.l_quantity), l_partkey@0 as l_partkey] AggregateExec: mode=Single, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_partkey, l_quantity] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_quantity] RaySQL Plan =========== @@ -49,9 +49,9 @@ ProjectionExec: expr=[CAST(sum(lineitem.l_extendedprice)@0 AS Float64) / 7 as av ProjectionExec: expr=[p_partkey@0 as p_partkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_brand@1 = Brand#42 AND p_container@2 = LG BAG - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_container], predicate=p_brand@3 = Brand#42 AND p_container@6 = LG BAG, pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 <= Brand#42 AND Brand#42 <= p_brand_max@1 END AND CASE WHEN p_container_null_count@6 = p_container_row_count@7 THEN false ELSE p_container_min@4 <= LG BAG AND LG BAG <= p_container_max@5 END, required_guarantees=[p_brand in (Brand#42), p_container in (LG BAG)] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_partkey, l_quantity, l_extendedprice] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_container], predicate=p_brand@3 = Brand#42 AND p_container@6 = LG BAG, pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 <= Brand#42 AND Brand#42 <= p_brand_max@1 END AND CASE WHEN p_container_null_count@6 = p_container_row_count@7 THEN false ELSE p_container_min@4 <= LG BAG AND LG BAG <= p_container_max@5 END, required_guarantees=[p_brand in (Brand#42), p_container in (LG BAG)] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_quantity, l_extendedprice] ProjectionExec: expr=[CAST(0.2 * CAST(avg(lineitem.l_quantity)@1 AS Float64) AS Decimal128(30, 15)) as Float64(0.2) * avg(lineitem.l_quantity), l_partkey@0 as l_partkey] AggregateExec: mode=Single, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_partkey, l_quantity] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_quantity] diff --git a/testdata/expected-plans/q18.txt b/testdata/expected-plans/q18.txt index 0fb9045..10e413b 100644 --- a/testdata/expected-plans/q18.txt +++ b/testdata/expected-plans/q18.txt @@ -29,14 +29,14 @@ SortExec: TopK(fetch=100), expr=[o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAS CoalesceBatchesExec: target_batch_size=8192 FilterExec: sum(lineitem.l_quantity)@1 > Some(31300),21,2 AggregateExec: mode=Single, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_quantity] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_quantity] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@2, l_orderkey@0)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@3, o_orderdate@4, l_quantity@6] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@4, o_orderdate@5] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13210681]]}, projection=[c_custkey, c_name] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_quantity] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_name] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_quantity] RaySQL Plan =========== @@ -50,12 +50,12 @@ SortExec: TopK(fetch=100), expr=[o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAS CoalesceBatchesExec: target_batch_size=8192 FilterExec: sum(lineitem.l_quantity)@1 > Some(31300),21,2 AggregateExec: mode=Single, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_quantity] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_quantity] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@2, l_orderkey@0)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@3, o_orderdate@4, l_quantity@6] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@4, o_orderdate@5] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13210681]]}, projection=[c_custkey, c_name] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_quantity] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_name] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_quantity] diff --git a/testdata/expected-plans/q19.txt b/testdata/expected-plans/q19.txt index c73edc7..a36fe83 100644 --- a/testdata/expected-plans/q19.txt +++ b/testdata/expected-plans/q19.txt @@ -20,11 +20,11 @@ ProjectionExec: expr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_disco HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], filter=p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND l_quantity@0 >= Some(800),11,2 AND l_quantity@0 <= Some(1800),11,2 AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND l_quantity@0 >= Some(2000),11,2 AND l_quantity@0 <= Some(3000),11,2 AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND l_quantity@0 >= Some(3000),11,2 AND l_quantity@0 <= Some(4000),11,2 AND p_size@2 <= 15, projection=[l_extendedprice@6, l_discount@7] CoalesceBatchesExec: target_batch_size=8192 FilterExec: (p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@2 <= 15) AND p_size@2 >= 1 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_size, p_container], predicate=p_size@5 >= 1 AND (p_brand@3 = Brand#21 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@5 <= 5 OR p_brand@3 = Brand#13 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@5 <= 10 OR p_brand@3 = Brand#52 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@5 <= 15), pruning_predicate=CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_max@0 >= 1 END AND (CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#21 AND Brand#21 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM CASE AND SM CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM BOX AND SM BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PACK AND SM PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PKG AND SM PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 5 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#13 AND Brand#13 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BAG AND MED BAG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BOX AND MED BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PKG AND MED PKG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PACK AND MED PACK <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 10 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#52 AND Brand#52 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG CASE AND LG CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG BOX AND LG BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PACK AND LG PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PKG AND LG PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 15 END), required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_size, p_container], predicate=p_size@5 >= 1 AND (p_brand@3 = Brand#21 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@5 <= 5 OR p_brand@3 = Brand#13 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@5 <= 10 OR p_brand@3 = Brand#52 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@5 <= 15), pruning_predicate=CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_max@0 >= 1 END AND (CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#21 AND Brand#21 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM CASE AND SM CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM BOX AND SM BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PACK AND SM PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PKG AND SM PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 5 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#13 AND Brand#13 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BAG AND MED BAG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BOX AND MED BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PKG AND MED PKG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PACK AND MED PACK <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 10 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#52 AND Brand#52 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG CASE AND LG CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG BOX AND LG BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PACK AND LG PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PKG AND LG PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 15 END), required_guarantees=[] ProjectionExec: expr=[l_partkey@0 as l_partkey, l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: (l_quantity@1 >= Some(800),11,2 AND l_quantity@1 <= Some(1800),11,2 OR l_quantity@1 >= Some(2000),11,2 AND l_quantity@1 <= Some(3000),11,2 OR l_quantity@1 >= Some(3000),11,2 AND l_quantity@1 <= Some(4000),11,2) AND (l_shipmode@5 = AIR OR l_shipmode@5 = AIR REG) AND l_shipinstruct@4 = DELIVER IN PERSON - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], predicate=(l_shipmode@14 = AIR OR l_shipmode@14 = AIR REG) AND l_shipinstruct@13 = DELIVER IN PERSON AND (l_quantity@4 >= Some(800),11,2 AND l_quantity@4 <= Some(1800),11,2 OR l_quantity@4 >= Some(2000),11,2 AND l_quantity@4 <= Some(3000),11,2 OR l_quantity@4 >= Some(3000),11,2 AND l_quantity@4 <= Some(4000),11,2), pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR AND AIR <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR REG AND AIR REG <= l_shipmode_max@1 END) AND CASE WHEN l_shipinstruct_null_count@6 = l_shipinstruct_row_count@7 THEN false ELSE l_shipinstruct_min@4 <= DELIVER IN PERSON AND DELIVER IN PERSON <= l_shipinstruct_max@5 END AND (CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(800),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(1800),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(2000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(3000),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(3000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(4000),11,2 END), required_guarantees=[l_shipmode in (AIR REG, AIR), l_shipinstruct in (DELIVER IN PERSON)] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], predicate=(l_shipmode@14 = AIR OR l_shipmode@14 = AIR REG) AND l_shipinstruct@13 = DELIVER IN PERSON AND (l_quantity@4 >= Some(800),11,2 AND l_quantity@4 <= Some(1800),11,2 OR l_quantity@4 >= Some(2000),11,2 AND l_quantity@4 <= Some(3000),11,2 OR l_quantity@4 >= Some(3000),11,2 AND l_quantity@4 <= Some(4000),11,2), pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR AND AIR <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR REG AND AIR REG <= l_shipmode_max@1 END) AND CASE WHEN l_shipinstruct_null_count@6 = l_shipinstruct_row_count@7 THEN false ELSE l_shipinstruct_min@4 <= DELIVER IN PERSON AND DELIVER IN PERSON <= l_shipinstruct_max@5 END AND (CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(800),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(1800),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(2000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(3000),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(3000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(4000),11,2 END), required_guarantees=[l_shipmode in (AIR REG, AIR), l_shipinstruct in (DELIVER IN PERSON)] RaySQL Plan =========== @@ -36,9 +36,9 @@ ProjectionExec: expr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_disco HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], filter=p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND l_quantity@0 >= Some(800),11,2 AND l_quantity@0 <= Some(1800),11,2 AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND l_quantity@0 >= Some(2000),11,2 AND l_quantity@0 <= Some(3000),11,2 AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND l_quantity@0 >= Some(3000),11,2 AND l_quantity@0 <= Some(4000),11,2 AND p_size@2 <= 15, projection=[l_extendedprice@6, l_discount@7] CoalesceBatchesExec: target_batch_size=8192 FilterExec: (p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@2 <= 15) AND p_size@2 >= 1 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_size, p_container], predicate=p_size@5 >= 1 AND (p_brand@3 = Brand#21 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@5 <= 5 OR p_brand@3 = Brand#13 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@5 <= 10 OR p_brand@3 = Brand#52 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@5 <= 15), pruning_predicate=CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_max@0 >= 1 END AND (CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#21 AND Brand#21 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM CASE AND SM CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM BOX AND SM BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PACK AND SM PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PKG AND SM PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 5 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#13 AND Brand#13 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BAG AND MED BAG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BOX AND MED BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PKG AND MED PKG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PACK AND MED PACK <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 10 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#52 AND Brand#52 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG CASE AND LG CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG BOX AND LG BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PACK AND LG PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PKG AND LG PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 15 END), required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_size, p_container], predicate=p_size@5 >= 1 AND (p_brand@3 = Brand#21 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@5 <= 5 OR p_brand@3 = Brand#13 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@5 <= 10 OR p_brand@3 = Brand#52 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@5 <= 15), pruning_predicate=CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_max@0 >= 1 END AND (CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#21 AND Brand#21 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM CASE AND SM CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM BOX AND SM BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PACK AND SM PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PKG AND SM PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 5 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#13 AND Brand#13 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BAG AND MED BAG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BOX AND MED BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PKG AND MED PKG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PACK AND MED PACK <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 10 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#52 AND Brand#52 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG CASE AND LG CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG BOX AND LG BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PACK AND LG PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PKG AND LG PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 15 END), required_guarantees=[] ProjectionExec: expr=[l_partkey@0 as l_partkey, l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: (l_quantity@1 >= Some(800),11,2 AND l_quantity@1 <= Some(1800),11,2 OR l_quantity@1 >= Some(2000),11,2 AND l_quantity@1 <= Some(3000),11,2 OR l_quantity@1 >= Some(3000),11,2 AND l_quantity@1 <= Some(4000),11,2) AND (l_shipmode@5 = AIR OR l_shipmode@5 = AIR REG) AND l_shipinstruct@4 = DELIVER IN PERSON - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], predicate=(l_shipmode@14 = AIR OR l_shipmode@14 = AIR REG) AND l_shipinstruct@13 = DELIVER IN PERSON AND (l_quantity@4 >= Some(800),11,2 AND l_quantity@4 <= Some(1800),11,2 OR l_quantity@4 >= Some(2000),11,2 AND l_quantity@4 <= Some(3000),11,2 OR l_quantity@4 >= Some(3000),11,2 AND l_quantity@4 <= Some(4000),11,2), pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR AND AIR <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR REG AND AIR REG <= l_shipmode_max@1 END) AND CASE WHEN l_shipinstruct_null_count@6 = l_shipinstruct_row_count@7 THEN false ELSE l_shipinstruct_min@4 <= DELIVER IN PERSON AND DELIVER IN PERSON <= l_shipinstruct_max@5 END AND (CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(800),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(1800),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(2000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(3000),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(3000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(4000),11,2 END), required_guarantees=[l_shipmode in (AIR REG, AIR), l_shipinstruct in (DELIVER IN PERSON)] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], predicate=(l_shipmode@14 = AIR OR l_shipmode@14 = AIR REG) AND l_shipinstruct@13 = DELIVER IN PERSON AND (l_quantity@4 >= Some(800),11,2 AND l_quantity@4 <= Some(1800),11,2 OR l_quantity@4 >= Some(2000),11,2 AND l_quantity@4 <= Some(3000),11,2 OR l_quantity@4 >= Some(3000),11,2 AND l_quantity@4 <= Some(4000),11,2), pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR AND AIR <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR REG AND AIR REG <= l_shipmode_max@1 END) AND CASE WHEN l_shipinstruct_null_count@6 = l_shipinstruct_row_count@7 THEN false ELSE l_shipinstruct_min@4 <= DELIVER IN PERSON AND DELIVER IN PERSON <= l_shipinstruct_max@5 END AND (CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(800),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(1800),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(2000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(3000),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(3000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(4000),11,2 END), required_guarantees=[l_shipmode in (AIR REG, AIR), l_shipinstruct in (DELIVER IN PERSON)] diff --git a/testdata/expected-plans/q2.txt b/testdata/expected-plans/q2.txt index f22938c..94a1eb2 100644 --- a/testdata/expected-plans/q2.txt +++ b/testdata/expected-plans/q2.txt @@ -50,22 +50,22 @@ SortExec: TopK(fetch=100), expr=[s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name ProjectionExec: expr=[r_regionkey@0 as r_regionkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: r_name@1 = ASIA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] ProjectionExec: expr=[p_partkey@2 as p_partkey, p_mfgr@3 as p_mfgr, s_name@4 as s_name, s_address@5 as s_address, s_phone@6 as s_phone, s_acctbal@7 as s_acctbal, s_comment@8 as s_comment, ps_supplycost@9 as ps_supplycost, n_name@0 as n_name, n_regionkey@1 as n_regionkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@4)], projection=[n_name@1, n_regionkey@2, p_partkey@3, p_mfgr@4, s_name@5, s_address@6, s_phone@8, s_acctbal@9, s_comment@10, ps_supplycost@11] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] ProjectionExec: expr=[p_partkey@6 as p_partkey, p_mfgr@7 as p_mfgr, s_name@0 as s_name, s_address@1 as s_address, s_nationkey@2 as s_nationkey, s_phone@3 as s_phone, s_acctbal@4 as s_acctbal, s_comment@5 as s_comment, ps_supplycost@8 as ps_supplycost] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@2)], projection=[s_name@1, s_address@2, s_nationkey@3, s_phone@4, s_acctbal@5, s_comment@6, p_partkey@7, p_mfgr@8, ps_supplycost@10] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_partkey@0, p_mfgr@1, ps_suppkey@3, ps_supplycost@4] ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_size@3 = 48 AND p_type@2 LIKE %TIN - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_mfgr, p_type, p_size], predicate=p_size@5 = 48 AND p_type@4 LIKE %TIN, pruning_predicate=CASE WHEN p_size_null_count@2 = p_size_row_count@3 THEN false ELSE p_size_min@0 <= 48 AND 48 <= p_size_max@1 END, required_guarantees=[p_size in (48)] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41093580]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_mfgr, p_type, p_size], predicate=p_size@5 = 48 AND p_type@4 LIKE %TIN, pruning_predicate=CASE WHEN p_size_null_count@2 = p_size_row_count@3 THEN false ELSE p_size_min@0 <= 48 AND 48 <= p_size_max@1 END, required_guarantees=[p_size in (48)] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] ProjectionExec: expr=[min(partsupp.ps_supplycost)@1 as min(partsupp.ps_supplycost), ps_partkey@0 as ps_partkey] AggregateExec: mode=Single, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)] CoalesceBatchesExec: target_batch_size=8192 @@ -73,16 +73,16 @@ SortExec: TopK(fetch=100), expr=[s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name ProjectionExec: expr=[r_regionkey@0 as r_regionkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: r_name@1 = ASIA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, n_regionkey@0 as n_regionkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_regionkey@1, ps_partkey@2, ps_supplycost@3] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_regionkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_regionkey] ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_supplycost@4] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41093580]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] RaySQL Plan =========== @@ -97,22 +97,22 @@ SortExec: TopK(fetch=100), expr=[s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name ProjectionExec: expr=[r_regionkey@0 as r_regionkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: r_name@1 = ASIA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] ProjectionExec: expr=[p_partkey@2 as p_partkey, p_mfgr@3 as p_mfgr, s_name@4 as s_name, s_address@5 as s_address, s_phone@6 as s_phone, s_acctbal@7 as s_acctbal, s_comment@8 as s_comment, ps_supplycost@9 as ps_supplycost, n_name@0 as n_name, n_regionkey@1 as n_regionkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@4)], projection=[n_name@1, n_regionkey@2, p_partkey@3, p_mfgr@4, s_name@5, s_address@6, s_phone@8, s_acctbal@9, s_comment@10, ps_supplycost@11] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] ProjectionExec: expr=[p_partkey@6 as p_partkey, p_mfgr@7 as p_mfgr, s_name@0 as s_name, s_address@1 as s_address, s_nationkey@2 as s_nationkey, s_phone@3 as s_phone, s_acctbal@4 as s_acctbal, s_comment@5 as s_comment, ps_supplycost@8 as ps_supplycost] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@2)], projection=[s_name@1, s_address@2, s_nationkey@3, s_phone@4, s_acctbal@5, s_comment@6, p_partkey@7, p_mfgr@8, ps_supplycost@10] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_partkey@0, p_mfgr@1, ps_suppkey@3, ps_supplycost@4] ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_size@3 = 48 AND p_type@2 LIKE %TIN - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_mfgr, p_type, p_size], predicate=p_size@5 = 48 AND p_type@4 LIKE %TIN, pruning_predicate=CASE WHEN p_size_null_count@2 = p_size_row_count@3 THEN false ELSE p_size_min@0 <= 48 AND 48 <= p_size_max@1 END, required_guarantees=[p_size in (48)] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41093580]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_mfgr, p_type, p_size], predicate=p_size@5 = 48 AND p_type@4 LIKE %TIN, pruning_predicate=CASE WHEN p_size_null_count@2 = p_size_row_count@3 THEN false ELSE p_size_min@0 <= 48 AND 48 <= p_size_max@1 END, required_guarantees=[p_size in (48)] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] ProjectionExec: expr=[min(partsupp.ps_supplycost)@1 as min(partsupp.ps_supplycost), ps_partkey@0 as ps_partkey] AggregateExec: mode=Single, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)] CoalesceBatchesExec: target_batch_size=8192 @@ -120,14 +120,14 @@ SortExec: TopK(fetch=100), expr=[s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name ProjectionExec: expr=[r_regionkey@0 as r_regionkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: r_name@1 = ASIA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, n_regionkey@0 as n_regionkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_regionkey@1, ps_partkey@2, ps_supplycost@3] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_regionkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_regionkey] ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_supplycost@4] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41093580]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] diff --git a/testdata/expected-plans/q20.txt b/testdata/expected-plans/q20.txt index 832ab30..49c0843 100644 --- a/testdata/expected-plans/q20.txt +++ b/testdata/expected-plans/q20.txt @@ -37,8 +37,8 @@ SortExec: expr=[s_name@0 ASC NULLS LAST], preserve_partitioning=[false] ProjectionExec: expr=[n_nationkey@0 as n_nationkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = KENYA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = KENYA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= KENYA AND KENYA <= n_name_max@1 END, required_guarantees=[n_name in (KENYA)] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = KENYA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= KENYA AND KENYA <= n_name_max@1 END, required_guarantees=[n_name in (KENYA)] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(ps_partkey@0, l_partkey@1), (ps_suppkey@1, l_suppkey@2)], filter=CAST(ps_availqty@0 AS Float64) > Float64(0.5) * sum(lineitem.l_quantity)@1, projection=[ps_suppkey@1] CoalesceBatchesExec: target_batch_size=8192 @@ -46,14 +46,14 @@ SortExec: expr=[s_name@0 ASC NULLS LAST], preserve_partitioning=[false] ProjectionExec: expr=[p_partkey@0 as p_partkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_name@1 LIKE blanched% - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE blanched% - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41093580]]}, projection=[ps_partkey, ps_suppkey, ps_availqty] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE blanched% + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_availqty] ProjectionExec: expr=[0.5 * CAST(sum(lineitem.l_quantity)@2 AS Float64) as Float64(0.5) * sum(lineitem.l_quantity), l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey] AggregateExec: mode=Single, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] ProjectionExec: expr=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey, l_quantity@2 as l_quantity] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@3 >= 1993-01-01 AND l_shipdate@3 < 1994-01-01 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], predicate=l_shipdate@10 >= 1993-01-01 AND l_shipdate@10 < 1994-01-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1993-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1994-01-01 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], predicate=l_shipdate@10 >= 1993-01-01 AND l_shipdate@10 < 1994-01-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1993-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1994-01-01 END, required_guarantees=[] RaySQL Plan =========== @@ -67,8 +67,8 @@ SortExec: expr=[s_name@0 ASC NULLS LAST], preserve_partitioning=[false] ProjectionExec: expr=[n_nationkey@0 as n_nationkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = KENYA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = KENYA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= KENYA AND KENYA <= n_name_max@1 END, required_guarantees=[n_name in (KENYA)] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = KENYA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= KENYA AND KENYA <= n_name_max@1 END, required_guarantees=[n_name in (KENYA)] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(ps_partkey@0, l_partkey@1), (ps_suppkey@1, l_suppkey@2)], filter=CAST(ps_availqty@0 AS Float64) > Float64(0.5) * sum(lineitem.l_quantity)@1, projection=[ps_suppkey@1] CoalesceBatchesExec: target_batch_size=8192 @@ -76,12 +76,12 @@ SortExec: expr=[s_name@0 ASC NULLS LAST], preserve_partitioning=[false] ProjectionExec: expr=[p_partkey@0 as p_partkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_name@1 LIKE blanched% - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE blanched% - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41093580]]}, projection=[ps_partkey, ps_suppkey, ps_availqty] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE blanched% + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_availqty] ProjectionExec: expr=[0.5 * CAST(sum(lineitem.l_quantity)@2 AS Float64) as Float64(0.5) * sum(lineitem.l_quantity), l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey] AggregateExec: mode=Single, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] ProjectionExec: expr=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey, l_quantity@2 as l_quantity] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@3 >= 1993-01-01 AND l_shipdate@3 < 1994-01-01 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], predicate=l_shipdate@10 >= 1993-01-01 AND l_shipdate@10 < 1994-01-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1993-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1994-01-01 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], predicate=l_shipdate@10 >= 1993-01-01 AND l_shipdate@10 < 1994-01-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1993-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1994-01-01 END, required_guarantees=[] diff --git a/testdata/expected-plans/q21.txt b/testdata/expected-plans/q21.txt index 7c4f7a4..3426574 100644 --- a/testdata/expected-plans/q21.txt +++ b/testdata/expected-plans/q21.txt @@ -49,25 +49,25 @@ SortExec: TopK(fetch=100), expr=[numwait@1 DESC,s_name@0 ASC NULLS LAST], preser ProjectionExec: expr=[n_nationkey@0 as n_nationkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = ARGENTINA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ARGENTINA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ARGENTINA AND ARGENTINA <= n_name_max@1 END, required_guarantees=[n_name in (ARGENTINA)] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ARGENTINA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ARGENTINA AND ARGENTINA <= n_name_max@1 END, required_guarantees=[n_name in (ARGENTINA)] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@2)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] ProjectionExec: expr=[o_orderkey@0 as o_orderkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderstatus@1 = F - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_orderstatus], predicate=o_orderstatus@2 = F, pruning_predicate=CASE WHEN o_orderstatus_null_count@2 = o_orderstatus_row_count@3 THEN false ELSE o_orderstatus_min@0 <= F AND F <= o_orderstatus_max@1 END, required_guarantees=[o_orderstatus in (F)] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_orderstatus], predicate=o_orderstatus@2 = F, pruning_predicate=CASE WHEN o_orderstatus_null_count@2 = o_orderstatus_row_count@3 THEN false ELSE o_orderstatus_min@0 <= F AND F <= o_orderstatus_max@1 END, required_guarantees=[o_orderstatus in (F)] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_nationkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_nationkey] ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_receiptdate@3 > l_commitdate@2 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_suppkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey] ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_receiptdate@3 > l_commitdate@2 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 RaySQL Plan =========== @@ -85,23 +85,23 @@ SortExec: TopK(fetch=100), expr=[numwait@1 DESC,s_name@0 ASC NULLS LAST], preser ProjectionExec: expr=[n_nationkey@0 as n_nationkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = ARGENTINA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ARGENTINA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ARGENTINA AND ARGENTINA <= n_name_max@1 END, required_guarantees=[n_name in (ARGENTINA)] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ARGENTINA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ARGENTINA AND ARGENTINA <= n_name_max@1 END, required_guarantees=[n_name in (ARGENTINA)] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@2)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] ProjectionExec: expr=[o_orderkey@0 as o_orderkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderstatus@1 = F - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_orderstatus], predicate=o_orderstatus@2 = F, pruning_predicate=CASE WHEN o_orderstatus_null_count@2 = o_orderstatus_row_count@3 THEN false ELSE o_orderstatus_min@0 <= F AND F <= o_orderstatus_max@1 END, required_guarantees=[o_orderstatus in (F)] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_orderstatus], predicate=o_orderstatus@2 = F, pruning_predicate=CASE WHEN o_orderstatus_null_count@2 = o_orderstatus_row_count@3 THEN false ELSE o_orderstatus_min@0 <= F AND F <= o_orderstatus_max@1 END, required_guarantees=[o_orderstatus in (F)] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_nationkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_nationkey] ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_receiptdate@3 > l_commitdate@2 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_suppkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey] ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_receiptdate@3 > l_commitdate@2 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 diff --git a/testdata/expected-plans/q22.txt b/testdata/expected-plans/q22.txt index 21a5e01..80433ec 100644 --- a/testdata/expected-plans/q22.txt +++ b/testdata/expected-plans/q22.txt @@ -31,13 +31,13 @@ SortExec: expr=[cntrycode@0 ASC NULLS LAST], preserve_partitioning=[false] ProjectionExec: expr=[c_acctbal@1 as c_acctbal] CoalesceBatchesExec: target_batch_size=8192 FilterExec: c_acctbal@1 > Some(0),11,2 AND Use substr(c_phone@0, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13210681]]}, projection=[c_phone, c_acctbal], predicate=c_acctbal@5 > Some(0),11,2 AND Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) AND c_acctbal@5 > Some(0),11,2, pruning_predicate=CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END AND CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_phone, c_acctbal], predicate=c_acctbal@5 > Some(0),11,2 AND Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) AND c_acctbal@5 > Some(0),11,2, pruning_predicate=CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END AND CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END, required_guarantees=[] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=LeftAnti, on=[(c_custkey@0, o_custkey@0)], projection=[c_phone@1, c_acctbal@2] CoalesceBatchesExec: target_batch_size=8192 FilterExec: Use substr(c_phone@1, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13210681]]}, projection=[c_custkey, c_phone, c_acctbal], predicate=Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_custkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_phone, c_acctbal], predicate=Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_custkey] RaySQL Plan =========== @@ -52,11 +52,11 @@ SortExec: expr=[cntrycode@0 ASC NULLS LAST], preserve_partitioning=[false] ProjectionExec: expr=[c_acctbal@1 as c_acctbal] CoalesceBatchesExec: target_batch_size=8192 FilterExec: c_acctbal@1 > Some(0),11,2 AND Use substr(c_phone@0, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13210681]]}, projection=[c_phone, c_acctbal], predicate=c_acctbal@5 > Some(0),11,2 AND Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) AND c_acctbal@5 > Some(0),11,2, pruning_predicate=CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END AND CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_phone, c_acctbal], predicate=c_acctbal@5 > Some(0),11,2 AND Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) AND c_acctbal@5 > Some(0),11,2, pruning_predicate=CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END AND CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END, required_guarantees=[] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=LeftAnti, on=[(c_custkey@0, o_custkey@0)], projection=[c_phone@1, c_acctbal@2] CoalesceBatchesExec: target_batch_size=8192 FilterExec: Use substr(c_phone@1, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13210681]]}, projection=[c_custkey, c_phone, c_acctbal], predicate=Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_custkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_phone, c_acctbal], predicate=Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_custkey] diff --git a/testdata/expected-plans/q3.txt b/testdata/expected-plans/q3.txt index b75c633..5161cdd 100644 --- a/testdata/expected-plans/q3.txt +++ b/testdata/expected-plans/q3.txt @@ -31,14 +31,14 @@ SortExec: TopK(fetch=10), expr=[revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], pr ProjectionExec: expr=[c_custkey@0 as c_custkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: c_mktsegment@1 = BUILDING - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13210681]]}, projection=[c_custkey, c_mktsegment], predicate=c_mktsegment@6 = BUILDING, pruning_predicate=CASE WHEN c_mktsegment_null_count@2 = c_mktsegment_row_count@3 THEN false ELSE c_mktsegment_min@0 <= BUILDING AND BUILDING <= c_mktsegment_max@1 END, required_guarantees=[c_mktsegment in (BUILDING)] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_mktsegment], predicate=c_mktsegment@6 = BUILDING, pruning_predicate=CASE WHEN c_mktsegment_null_count@2 = c_mktsegment_row_count@3 THEN false ELSE c_mktsegment_min@0 <= BUILDING AND BUILDING <= c_mktsegment_max@1 END, required_guarantees=[c_mktsegment in (BUILDING)] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@2 < 1995-03-15 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], predicate=o_orderdate@4 < 1995-03-15, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@0 < 1995-03-15 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], predicate=o_orderdate@4 < 1995-03-15, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@0 < 1995-03-15 END, required_guarantees=[] ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@3 > 1995-03-15 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 > 1995-03-15, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 > 1995-03-15 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 > 1995-03-15, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 > 1995-03-15 END, required_guarantees=[] RaySQL Plan =========== @@ -54,12 +54,12 @@ SortExec: TopK(fetch=10), expr=[revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], pr ProjectionExec: expr=[c_custkey@0 as c_custkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: c_mktsegment@1 = BUILDING - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13210681]]}, projection=[c_custkey, c_mktsegment], predicate=c_mktsegment@6 = BUILDING, pruning_predicate=CASE WHEN c_mktsegment_null_count@2 = c_mktsegment_row_count@3 THEN false ELSE c_mktsegment_min@0 <= BUILDING AND BUILDING <= c_mktsegment_max@1 END, required_guarantees=[c_mktsegment in (BUILDING)] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_mktsegment], predicate=c_mktsegment@6 = BUILDING, pruning_predicate=CASE WHEN c_mktsegment_null_count@2 = c_mktsegment_row_count@3 THEN false ELSE c_mktsegment_min@0 <= BUILDING AND BUILDING <= c_mktsegment_max@1 END, required_guarantees=[c_mktsegment in (BUILDING)] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@2 < 1995-03-15 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], predicate=o_orderdate@4 < 1995-03-15, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@0 < 1995-03-15 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], predicate=o_orderdate@4 < 1995-03-15, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@0 < 1995-03-15 END, required_guarantees=[] ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@3 > 1995-03-15 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 > 1995-03-15, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 > 1995-03-15 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 > 1995-03-15, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 > 1995-03-15 END, required_guarantees=[] diff --git a/testdata/expected-plans/q4.txt b/testdata/expected-plans/q4.txt index 675f134..e4ae1e3 100644 --- a/testdata/expected-plans/q4.txt +++ b/testdata/expected-plans/q4.txt @@ -25,11 +25,11 @@ SortExec: expr=[o_orderpriority@0 ASC NULLS LAST], preserve_partitioning=[false] ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_orderpriority@2 as o_orderpriority] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@1 >= 1995-04-01 AND o_orderdate@1 < 1995-07-01 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_orderdate, o_orderpriority], predicate=o_orderdate@4 >= 1995-04-01 AND o_orderdate@4 < 1995-07-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-04-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-07-01 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_orderdate, o_orderpriority], predicate=o_orderdate@4 >= 1995-04-01 AND o_orderdate@4 < 1995-07-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-04-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-07-01 END, required_guarantees=[] ProjectionExec: expr=[l_orderkey@0 as l_orderkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_receiptdate@2 > l_commitdate@1 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 RaySQL Plan =========== @@ -43,9 +43,9 @@ SortExec: expr=[o_orderpriority@0 ASC NULLS LAST], preserve_partitioning=[false] ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_orderpriority@2 as o_orderpriority] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@1 >= 1995-04-01 AND o_orderdate@1 < 1995-07-01 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_orderdate, o_orderpriority], predicate=o_orderdate@4 >= 1995-04-01 AND o_orderdate@4 < 1995-07-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-04-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-07-01 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_orderdate, o_orderpriority], predicate=o_orderdate@4 >= 1995-04-01 AND o_orderdate@4 < 1995-07-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-04-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-07-01 END, required_guarantees=[] ProjectionExec: expr=[l_orderkey@0 as l_orderkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_receiptdate@2 > l_commitdate@1 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 diff --git a/testdata/expected-plans/q5.txt b/testdata/expected-plans/q5.txt index 8cfea51..f5c9022 100644 --- a/testdata/expected-plans/q5.txt +++ b/testdata/expected-plans/q5.txt @@ -36,25 +36,25 @@ SortExec: expr=[revenue@1 DESC], preserve_partitioning=[false] ProjectionExec: expr=[r_regionkey@0 as r_regionkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: r_name@1 = AFRICA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = AFRICA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= AFRICA AND AFRICA <= r_name_max@1 END, required_guarantees=[r_name in (AFRICA)] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = AFRICA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= AFRICA AND AFRICA <= r_name_max@1 END, required_guarantees=[r_name in (AFRICA)] ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, n_name@0 as n_name, n_regionkey@1 as n_regionkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, n_regionkey@2, l_extendedprice@3, l_discount@4] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@0 as s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1), (s_nationkey@1, c_nationkey@0)], projection=[s_nationkey@1, l_extendedprice@4, l_discount@5] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@1, l_orderkey@0)], projection=[c_nationkey@0, l_suppkey@3, l_extendedprice@4, l_discount@5] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_nationkey@1, o_orderkey@2] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13210681]]}, projection=[c_custkey, c_nationkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_nationkey] ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@2 >= 1994-01-01 AND o_orderdate@2 < 1995-01-01 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1994-01-01 AND o_orderdate@4 < 1995-01-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1994-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-01-01 END, required_guarantees=[] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1994-01-01 AND o_orderdate@4 < 1995-01-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1994-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-01-01 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] RaySQL Plan =========== @@ -68,23 +68,23 @@ SortExec: expr=[revenue@1 DESC], preserve_partitioning=[false] ProjectionExec: expr=[r_regionkey@0 as r_regionkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: r_name@1 = AFRICA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = AFRICA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= AFRICA AND AFRICA <= r_name_max@1 END, required_guarantees=[r_name in (AFRICA)] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = AFRICA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= AFRICA AND AFRICA <= r_name_max@1 END, required_guarantees=[r_name in (AFRICA)] ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, n_name@0 as n_name, n_regionkey@1 as n_regionkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, n_regionkey@2, l_extendedprice@3, l_discount@4] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@0 as s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1), (s_nationkey@1, c_nationkey@0)], projection=[s_nationkey@1, l_extendedprice@4, l_discount@5] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@1, l_orderkey@0)], projection=[c_nationkey@0, l_suppkey@3, l_extendedprice@4, l_discount@5] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_nationkey@1, o_orderkey@2] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13210681]]}, projection=[c_custkey, c_nationkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_nationkey] ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@2 >= 1994-01-01 AND o_orderdate@2 < 1995-01-01 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1994-01-01 AND o_orderdate@4 < 1995-01-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1994-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-01-01 END, required_guarantees=[] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1994-01-01 AND o_orderdate@4 < 1995-01-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1994-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-01-01 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] diff --git a/testdata/expected-plans/q6.txt b/testdata/expected-plans/q6.txt index 7d6ff2d..50f2026 100644 --- a/testdata/expected-plans/q6.txt +++ b/testdata/expected-plans/q6.txt @@ -15,7 +15,7 @@ ProjectionExec: expr=[sum(lineitem.l_extendedprice * lineitem.l_discount)@0 as r ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@3 >= 1994-01-01 AND l_shipdate@3 < 1995-01-01 AND l_discount@2 >= Some(3),11,2 AND l_discount@2 <= Some(5),11,2 AND l_quantity@0 < Some(2400),11,2 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1994-01-01 AND l_shipdate@10 < 1995-01-01 AND l_discount@6 >= Some(3),11,2 AND l_discount@6 <= Some(5),11,2 AND l_quantity@4 < Some(2400),11,2, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1994-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-01-01 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_max@4 >= Some(3),11,2 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_min@7 <= Some(5),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@8 < Some(2400),11,2 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1994-01-01 AND l_shipdate@10 < 1995-01-01 AND l_discount@6 >= Some(3),11,2 AND l_discount@6 <= Some(5),11,2 AND l_quantity@4 < Some(2400),11,2, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1994-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-01-01 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_max@4 >= Some(3),11,2 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_min@7 <= Some(5),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@8 < Some(2400),11,2 END, required_guarantees=[] RaySQL Plan =========== @@ -26,5 +26,5 @@ ProjectionExec: expr=[sum(lineitem.l_extendedprice * lineitem.l_discount)@0 as r ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@3 >= 1994-01-01 AND l_shipdate@3 < 1995-01-01 AND l_discount@2 >= Some(3),11,2 AND l_discount@2 <= Some(5),11,2 AND l_quantity@0 < Some(2400),11,2 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1994-01-01 AND l_shipdate@10 < 1995-01-01 AND l_discount@6 >= Some(3),11,2 AND l_discount@6 <= Some(5),11,2 AND l_quantity@4 < Some(2400),11,2, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1994-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-01-01 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_max@4 >= Some(3),11,2 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_min@7 <= Some(5),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@8 < Some(2400),11,2 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1994-01-01 AND l_shipdate@10 < 1995-01-01 AND l_discount@6 >= Some(3),11,2 AND l_discount@6 <= Some(5),11,2 AND l_quantity@4 < Some(2400),11,2, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1994-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-01-01 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_max@4 >= Some(3),11,2 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_min@7 <= Some(5),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@8 < Some(2400),11,2 END, required_guarantees=[] diff --git a/testdata/expected-plans/q7.txt b/testdata/expected-plans/q7.txt index 2d820c5..6d64219 100644 --- a/testdata/expected-plans/q7.txt +++ b/testdata/expected-plans/q7.txt @@ -38,26 +38,26 @@ SortExec: expr=[supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS LAST,l_year HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], filter=n_name@0 = GERMANY AND n_name@1 = IRAQ OR n_name@0 = IRAQ AND n_name@1 = GERMANY, projection=[n_name@1, l_extendedprice@2, l_discount@3, l_shipdate@4, n_name@6] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = IRAQ OR n_name@1 = GERMANY - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = IRAQ OR n_name@1 = GERMANY, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END, required_guarantees=[n_name in (IRAQ, GERMANY)] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = IRAQ OR n_name@1 = GERMANY, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_shipdate@3 as l_shipdate, c_nationkey@4 as c_nationkey, n_name@0 as n_name] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@0)], projection=[n_name@1, l_extendedprice@3, l_discount@4, l_shipdate@5, c_nationkey@6] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = GERMANY OR n_name@1 = IRAQ - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = GERMANY OR n_name@1 = IRAQ, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = GERMANY OR n_name@1 = IRAQ, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] ProjectionExec: expr=[s_nationkey@1 as s_nationkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, l_shipdate@4 as l_shipdate, c_nationkey@0 as c_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@4)], projection=[c_nationkey@1, s_nationkey@2, l_extendedprice@3, l_discount@4, l_shipdate@5] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13210681]]}, projection=[c_custkey, c_nationkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(l_orderkey@1, o_orderkey@0)], projection=[s_nationkey@0, l_extendedprice@2, l_discount@3, l_shipdate@4, o_custkey@6] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5, l_shipdate@6] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@4 >= 1995-01-01 AND l_shipdate@4 <= 1996-12-31 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-01-01 AND l_shipdate@10 <= 1996-12-31, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 <= 1996-12-31 END, required_guarantees=[] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_custkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-01-01 AND l_shipdate@10 <= 1996-12-31, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 <= 1996-12-31 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey] RaySQL Plan =========== @@ -71,24 +71,24 @@ SortExec: expr=[supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS LAST,l_year HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], filter=n_name@0 = GERMANY AND n_name@1 = IRAQ OR n_name@0 = IRAQ AND n_name@1 = GERMANY, projection=[n_name@1, l_extendedprice@2, l_discount@3, l_shipdate@4, n_name@6] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = IRAQ OR n_name@1 = GERMANY - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = IRAQ OR n_name@1 = GERMANY, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END, required_guarantees=[n_name in (IRAQ, GERMANY)] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = IRAQ OR n_name@1 = GERMANY, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_shipdate@3 as l_shipdate, c_nationkey@4 as c_nationkey, n_name@0 as n_name] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@0)], projection=[n_name@1, l_extendedprice@3, l_discount@4, l_shipdate@5, c_nationkey@6] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = GERMANY OR n_name@1 = IRAQ - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = GERMANY OR n_name@1 = IRAQ, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = GERMANY OR n_name@1 = IRAQ, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] ProjectionExec: expr=[s_nationkey@1 as s_nationkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, l_shipdate@4 as l_shipdate, c_nationkey@0 as c_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@4)], projection=[c_nationkey@1, s_nationkey@2, l_extendedprice@3, l_discount@4, l_shipdate@5] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13210681]]}, projection=[c_custkey, c_nationkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(l_orderkey@1, o_orderkey@0)], projection=[s_nationkey@0, l_extendedprice@2, l_discount@3, l_shipdate@4, o_custkey@6] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5, l_shipdate@6] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@4 >= 1995-01-01 AND l_shipdate@4 <= 1996-12-31 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-01-01 AND l_shipdate@10 <= 1996-12-31, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 <= 1996-12-31 END, required_guarantees=[] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_custkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-01-01 AND l_shipdate@10 <= 1996-12-31, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 <= 1996-12-31 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey] diff --git a/testdata/expected-plans/q8.txt b/testdata/expected-plans/q8.txt index 0f2a05c..e83a9c1 100644 --- a/testdata/expected-plans/q8.txt +++ b/testdata/expected-plans/q8.txt @@ -47,36 +47,36 @@ SortExec: expr=[o_year@0 ASC NULLS LAST], preserve_partitioning=[false] ProjectionExec: expr=[r_regionkey@0 as r_regionkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: r_name@1 = MIDDLE EAST - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = MIDDLE EAST, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= MIDDLE EAST AND MIDDLE EAST <= r_name_max@1 END, required_guarantees=[r_name in (MIDDLE EAST)] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = MIDDLE EAST, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= MIDDLE EAST AND MIDDLE EAST <= r_name_max@1 END, required_guarantees=[r_name in (MIDDLE EAST)] ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, o_orderdate@3 as o_orderdate, n_regionkey@4 as n_regionkey, n_name@0 as n_name] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, l_extendedprice@2, l_discount@3, o_orderdate@5, n_regionkey@6] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, n_regionkey@0 as n_regionkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, c_nationkey@4)], projection=[n_regionkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@5] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_regionkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_regionkey] ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, c_nationkey@0 as c_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@3)], projection=[c_nationkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@6] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13210681]]}, projection=[c_custkey, c_nationkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_nationkey] ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, o_custkey@0 as o_custkey, o_orderdate@1 as o_orderdate] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_custkey@1, o_orderdate@2, l_extendedprice@4, l_discount@5, s_nationkey@6] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@2 >= 1995-01-01 AND o_orderdate@2 <= 1996-12-31 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1995-01-01 AND o_orderdate@4 <= 1996-12-31, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 <= 1996-12-31 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1995-01-01 AND o_orderdate@4 <= 1996-12-31, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 <= 1996-12-31 END, required_guarantees=[] ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@0 as s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_suppkey@3, l_extendedprice@4, l_discount@5] ProjectionExec: expr=[p_partkey@0 as p_partkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_type@1 = LARGE PLATED STEEL - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_type], predicate=p_type@4 = LARGE PLATED STEEL, pruning_predicate=CASE WHEN p_type_null_count@2 = p_type_row_count@3 THEN false ELSE p_type_min@0 <= LARGE PLATED STEEL AND LARGE PLATED STEEL <= p_type_max@1 END, required_guarantees=[p_type in (LARGE PLATED STEEL)] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_type], predicate=p_type@4 = LARGE PLATED STEEL, pruning_predicate=CASE WHEN p_type_null_count@2 = p_type_row_count@3 THEN false ELSE p_type_min@0 <= LARGE PLATED STEEL AND LARGE PLATED STEEL <= p_type_max@1 END, required_guarantees=[p_type in (LARGE PLATED STEEL)] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] RaySQL Plan =========== @@ -91,34 +91,34 @@ SortExec: expr=[o_year@0 ASC NULLS LAST], preserve_partitioning=[false] ProjectionExec: expr=[r_regionkey@0 as r_regionkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: r_name@1 = MIDDLE EAST - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = MIDDLE EAST, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= MIDDLE EAST AND MIDDLE EAST <= r_name_max@1 END, required_guarantees=[r_name in (MIDDLE EAST)] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = MIDDLE EAST, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= MIDDLE EAST AND MIDDLE EAST <= r_name_max@1 END, required_guarantees=[r_name in (MIDDLE EAST)] ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, o_orderdate@3 as o_orderdate, n_regionkey@4 as n_regionkey, n_name@0 as n_name] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, l_extendedprice@2, l_discount@3, o_orderdate@5, n_regionkey@6] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, n_regionkey@0 as n_regionkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, c_nationkey@4)], projection=[n_regionkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@5] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_regionkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_regionkey] ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, c_nationkey@0 as c_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@3)], projection=[c_nationkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@6] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13210681]]}, projection=[c_custkey, c_nationkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_nationkey] ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, o_custkey@0 as o_custkey, o_orderdate@1 as o_orderdate] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_custkey@1, o_orderdate@2, l_extendedprice@4, l_discount@5, s_nationkey@6] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@2 >= 1995-01-01 AND o_orderdate@2 <= 1996-12-31 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1995-01-01 AND o_orderdate@4 <= 1996-12-31, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 <= 1996-12-31 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1995-01-01 AND o_orderdate@4 <= 1996-12-31, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 <= 1996-12-31 END, required_guarantees=[] ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@0 as s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_suppkey@3, l_extendedprice@4, l_discount@5] ProjectionExec: expr=[p_partkey@0 as p_partkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_type@1 = LARGE PLATED STEEL - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_type], predicate=p_type@4 = LARGE PLATED STEEL, pruning_predicate=CASE WHEN p_type_null_count@2 = p_type_row_count@3 THEN false ELSE p_type_min@0 <= LARGE PLATED STEEL AND LARGE PLATED STEEL <= p_type_max@1 END, required_guarantees=[p_type in (LARGE PLATED STEEL)] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_type], predicate=p_type@4 = LARGE PLATED STEEL, pruning_predicate=CASE WHEN p_type_null_count@2 = p_type_row_count@3 THEN false ELSE p_type_min@0 <= LARGE PLATED STEEL AND LARGE PLATED STEEL <= p_type_max@1 END, required_guarantees=[p_type in (LARGE PLATED STEEL)] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] diff --git a/testdata/expected-plans/q9.txt b/testdata/expected-plans/q9.txt index 886476a..203a47e 100644 --- a/testdata/expected-plans/q9.txt +++ b/testdata/expected-plans/q9.txt @@ -33,26 +33,26 @@ SortExec: expr=[nation@0 ASC NULLS LAST,o_year@1 DESC], preserve_partitioning=[f ProjectionExec: expr=[n_name@0 as nation, date_part(YEAR, o_orderdate@5) as o_year, l_extendedprice@2 * (Some(1),20,0 - l_discount@3) - ps_supplycost@4 * l_quantity@1 as amount] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[n_name@1, l_quantity@2, l_extendedprice@3, l_discount@4, ps_supplycost@6, o_orderdate@7] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, ps_supplycost@5 as ps_supplycost, o_orderdate@0 as o_orderdate] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, l_quantity@3, l_extendedprice@4, l_discount@5, s_nationkey@6, ps_supplycost@7] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_orderdate] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_orderdate] ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_quantity@2 as l_quantity, l_extendedprice@3 as l_extendedprice, l_discount@4 as l_discount, s_nationkey@5 as s_nationkey, ps_supplycost@0 as ps_supplycost] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(ps_suppkey@1, l_suppkey@2), (ps_partkey@0, l_partkey@1)], projection=[ps_supplycost@2, l_orderkey@3, l_quantity@6, l_extendedprice@7, l_discount@8, s_nationkey@9] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41093580]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_partkey@2 as l_partkey, l_suppkey@3 as l_suppkey, l_quantity@4 as l_quantity, l_extendedprice@5 as l_extendedprice, l_discount@6 as l_discount, s_nationkey@0 as s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@2)], projection=[s_nationkey@1, l_orderkey@2, l_partkey@3, l_suppkey@4, l_quantity@5, l_extendedprice@6, l_discount@7] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_partkey@2, l_suppkey@3, l_quantity@4, l_extendedprice@5, l_discount@6] ProjectionExec: expr=[p_partkey@0 as p_partkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_name@1 LIKE %moccasin% - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE %moccasin% - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE %moccasin% + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] RaySQL Plan =========== @@ -64,24 +64,24 @@ SortExec: expr=[nation@0 ASC NULLS LAST,o_year@1 DESC], preserve_partitioning=[f ProjectionExec: expr=[n_name@0 as nation, date_part(YEAR, o_orderdate@5) as o_year, l_extendedprice@2 * (Some(1),20,0 - l_discount@3) - ps_supplycost@4 * l_quantity@1 as amount] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[n_name@1, l_quantity@2, l_extendedprice@3, l_discount@4, ps_supplycost@6, o_orderdate@7] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, ps_supplycost@5 as ps_supplycost, o_orderdate@0 as o_orderdate] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, l_quantity@3, l_extendedprice@4, l_discount@5, s_nationkey@6, ps_supplycost@7] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54533415]]}, projection=[o_orderkey, o_orderdate] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_orderdate] ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_quantity@2 as l_quantity, l_extendedprice@3 as l_extendedprice, l_discount@4 as l_discount, s_nationkey@5 as s_nationkey, ps_supplycost@0 as ps_supplycost] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(ps_suppkey@1, l_suppkey@2), (ps_partkey@0, l_partkey@1)], projection=[ps_supplycost@2, l_orderkey@3, l_quantity@6, l_extendedprice@7, l_discount@8, s_nationkey@9] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41093580]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_partkey@2 as l_partkey, l_suppkey@3 as l_suppkey, l_quantity@4 as l_quantity, l_extendedprice@5 as l_extendedprice, l_discount@6 as l_discount, s_nationkey@0 as s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@2)], projection=[s_nationkey@1, l_orderkey@2, l_partkey@3, l_suppkey@4, l_quantity@5, l_extendedprice@6, l_discount@7] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_partkey@2, l_suppkey@3, l_quantity@4, l_extendedprice@5, l_discount@6] ProjectionExec: expr=[p_partkey@0 as p_partkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_name@1 LIKE %moccasin% - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE %moccasin% - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208325896]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE %moccasin% + ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] diff --git a/tpch/tpchgen.py b/tpch/tpchgen.py index e1822ab..2425916 100644 --- a/tpch/tpchgen.py +++ b/tpch/tpchgen.py @@ -174,7 +174,7 @@ def generate_tpch(scale_factor: int, partitions: int): start_time = time.time() docker_cmd = os.getenv("DOCKER_CMD", "docker") if partitions == 1: - command = f"{docker_cmd} run -v `pwd`/data:/data -t --rm ghcr.io/scalytics/tpch-docker:main -vf -s {scale_factor}" + command = f"{docker_cmd} run -v `pwd`/data:/data -t --rm ghcr.io/scalytics/tpch-docker:main -vf -s {scale_factor} -r 1" run_and_log_output(command, "/tmp/tpchgen.log") else: max_threads = os.cpu_count() @@ -182,7 +182,7 @@ def generate_tpch(scale_factor: int, partitions: int): # List of commands to run commands = [ ( - f"{docker_cmd} run -v `pwd`/data:/data -t --rm ghcr.io/scalytics/tpch-docker:main -vf -s {scale_factor} -C {partitions} -S {part}", + f"{docker_cmd} run -v `pwd`/data:/data -t --rm ghcr.io/scalytics/tpch-docker:main -vf -s {scale_factor} -C {partitions} -S {part} -r 1", f"/tmp/tpchgen-part{part}.log", ) for part in range(1, partitions + 1) From 434f34274bd4e6086dcc9849c82172b14aa49855 Mon Sep 17 00:00:00 2001 From: Edmondo Porcu Date: Thu, 3 Oct 2024 16:49:32 -0400 Subject: [PATCH 07/14] One partition only --- .github/workflows/rust.yml | 2 +- src/planner.rs | 2 +- testdata/expected-plans/q1.txt | 4 ++-- testdata/expected-plans/q10.txt | 16 +++++++-------- testdata/expected-plans/q11.txt | 24 +++++++++++----------- testdata/expected-plans/q12.txt | 8 ++++---- testdata/expected-plans/q13.txt | 8 ++++---- testdata/expected-plans/q14.txt | 8 ++++---- testdata/expected-plans/q16.txt | 12 +++++------ testdata/expected-plans/q17.txt | 12 +++++------ testdata/expected-plans/q18.txt | 16 +++++++-------- testdata/expected-plans/q19.txt | 8 ++++---- testdata/expected-plans/q2.txt | 36 ++++++++++++++++----------------- testdata/expected-plans/q20.txt | 20 +++++++++--------- testdata/expected-plans/q21.txt | 24 +++++++++++----------- testdata/expected-plans/q22.txt | 12 +++++------ testdata/expected-plans/q3.txt | 12 +++++------ testdata/expected-plans/q4.txt | 8 ++++---- testdata/expected-plans/q5.txt | 24 +++++++++++----------- testdata/expected-plans/q6.txt | 4 ++-- testdata/expected-plans/q7.txt | 24 +++++++++++----------- testdata/expected-plans/q8.txt | 32 ++++++++++++++--------------- testdata/expected-plans/q9.txt | 24 +++++++++++----------- 23 files changed, 170 insertions(+), 170 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 5f355f2..3e59e61 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -8,7 +8,7 @@ env: CARGO_TERM_COLOR: always PYTHON_VERSION: 3.9 TPCH_SAMPLING_RATE: "0.1" - TPCH_TEST_PARTITIONS: "2" + TPCH_TEST_PARTITIONS: "1" TPCH_DATA_PATH: "data" jobs: diff --git a/src/planner.rs b/src/planner.rs index 40934fe..5c58663 100644 --- a/src/planner.rs +++ b/src/planner.rs @@ -384,7 +384,7 @@ mod test { let data_path = env::var("TPCH_DATA_PATH")?; let file = format!("testdata/queries/q{n}.sql"); let sql = fs::read_to_string(&file)?; - let config = SessionConfig::new().with_target_partitions(1); + let config = SessionConfig::new().with_target_partitions(4); let ctx = SessionContext::with_config(config); let tables = &[ "customer", "lineitem", "nation", "orders", "part", "partsupp", "region", "supplier", diff --git a/testdata/expected-plans/q1.txt b/testdata/expected-plans/q1.txt index aaf26ad..1b9c6a5 100644 --- a/testdata/expected-plans/q1.txt +++ b/testdata/expected-plans/q1.txt @@ -17,7 +17,7 @@ SortExec: expr=[l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST], pr ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, l_quantity@0 as l_quantity, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_tax@3 as l_tax, l_returnflag@4 as l_returnflag, l_linestatus@5 as l_linestatus] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@6 <= 1998-09-24 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], predicate=l_shipdate@10 <= 1998-09-24, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@0 <= 1998-09-24 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], predicate=l_shipdate@10 <= 1998-09-24, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@0 <= 1998-09-24 END, required_guarantees=[] RaySQL Plan =========== @@ -29,5 +29,5 @@ SortExec: expr=[l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST], pr ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, l_quantity@0 as l_quantity, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_tax@3 as l_tax, l_returnflag@4 as l_returnflag, l_linestatus@5 as l_linestatus] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@6 <= 1998-09-24 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], predicate=l_shipdate@10 <= 1998-09-24, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@0 <= 1998-09-24 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], predicate=l_shipdate@10 <= 1998-09-24, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@0 <= 1998-09-24 END, required_guarantees=[] diff --git a/testdata/expected-plans/q10.txt b/testdata/expected-plans/q10.txt index 417763d..b6e9f3f 100644 --- a/testdata/expected-plans/q10.txt +++ b/testdata/expected-plans/q10.txt @@ -29,20 +29,20 @@ SortExec: TopK(fetch=20), expr=[revenue@2 DESC], preserve_partitioning=[false] ProjectionExec: expr=[c_custkey@1 as c_custkey, c_name@2 as c_name, c_address@3 as c_address, c_phone@4 as c_phone, c_acctbal@5 as c_acctbal, c_comment@6 as c_comment, l_extendedprice@7 as l_extendedprice, l_discount@8 as l_discount, n_name@0 as n_name] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], projection=[n_name@1, c_custkey@2, c_name@3, c_address@4, c_phone@6, c_acctbal@7, c_comment@8, l_extendedprice@9, l_discount@10] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@7, l_orderkey@0)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, l_extendedprice@9, l_discount@10] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, o_orderkey@7] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@2 >= 1993-07-01 AND o_orderdate@2 < 1993-10-01 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1993-07-01 AND o_orderdate@4 < 1993-10-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1993-07-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1993-10-01 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1993-07-01 AND o_orderdate@4 < 1993-10-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1993-07-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1993-10-01 END, required_guarantees=[] ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_returnflag@3 = R - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], predicate=l_returnflag@8 = R, pruning_predicate=CASE WHEN l_returnflag_null_count@2 = l_returnflag_row_count@3 THEN false ELSE l_returnflag_min@0 <= R AND R <= l_returnflag_max@1 END, required_guarantees=[l_returnflag in (R)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], predicate=l_returnflag@8 = R, pruning_predicate=CASE WHEN l_returnflag_null_count@2 = l_returnflag_row_count@3 THEN false ELSE l_returnflag_min@0 <= R AND R <= l_returnflag_max@1 END, required_guarantees=[l_returnflag in (R)] RaySQL Plan =========== @@ -54,18 +54,18 @@ SortExec: TopK(fetch=20), expr=[revenue@2 DESC], preserve_partitioning=[false] ProjectionExec: expr=[c_custkey@1 as c_custkey, c_name@2 as c_name, c_address@3 as c_address, c_phone@4 as c_phone, c_acctbal@5 as c_acctbal, c_comment@6 as c_comment, l_extendedprice@7 as l_extendedprice, l_discount@8 as l_discount, n_name@0 as n_name] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], projection=[n_name@1, c_custkey@2, c_name@3, c_address@4, c_phone@6, c_acctbal@7, c_comment@8, l_extendedprice@9, l_discount@10] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@7, l_orderkey@0)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, l_extendedprice@9, l_discount@10] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, o_orderkey@7] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@2 >= 1993-07-01 AND o_orderdate@2 < 1993-10-01 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1993-07-01 AND o_orderdate@4 < 1993-10-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1993-07-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1993-10-01 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1993-07-01 AND o_orderdate@4 < 1993-10-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1993-07-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1993-10-01 END, required_guarantees=[] ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_returnflag@3 = R - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], predicate=l_returnflag@8 = R, pruning_predicate=CASE WHEN l_returnflag_null_count@2 = l_returnflag_row_count@3 THEN false ELSE l_returnflag_min@0 <= R AND R <= l_returnflag_max@1 END, required_guarantees=[l_returnflag in (R)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], predicate=l_returnflag@8 = R, pruning_predicate=CASE WHEN l_returnflag_null_count@2 = l_returnflag_row_count@3 THEN false ELSE l_returnflag_min@0 <= R AND R <= l_returnflag_max@1 END, required_guarantees=[l_returnflag in (R)] diff --git a/testdata/expected-plans/q11.txt b/testdata/expected-plans/q11.txt index abfb957..5b6c7ae 100644 --- a/testdata/expected-plans/q11.txt +++ b/testdata/expected-plans/q11.txt @@ -40,24 +40,24 @@ SortExec: expr=[value@1 DESC], preserve_partitioning=[false] ProjectionExec: expr=[n_nationkey@0 as n_nationkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = ALGERIA - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] ProjectionExec: expr=[ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_nationkey@1, ps_availqty@3, ps_supplycost@4] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost] AggregateExec: mode=Single, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[ps_partkey@1, ps_availqty@2, ps_supplycost@3] ProjectionExec: expr=[n_nationkey@0 as n_nationkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = ALGERIA - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_availqty@2 as ps_availqty, ps_supplycost@3 as ps_supplycost, s_nationkey@0 as s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_availqty@4, ps_supplycost@5] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] RaySQL Plan =========== @@ -73,22 +73,22 @@ SortExec: expr=[value@1 DESC], preserve_partitioning=[false] ProjectionExec: expr=[n_nationkey@0 as n_nationkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = ALGERIA - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] ProjectionExec: expr=[ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_nationkey@1, ps_availqty@3, ps_supplycost@4] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost] AggregateExec: mode=Single, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[ps_partkey@1, ps_availqty@2, ps_supplycost@3] ProjectionExec: expr=[n_nationkey@0 as n_nationkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = ALGERIA - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_availqty@2 as ps_availqty, ps_supplycost@3 as ps_supplycost, s_nationkey@0 as s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_availqty@4, ps_supplycost@5] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] diff --git a/testdata/expected-plans/q12.txt b/testdata/expected-plans/q12.txt index 563495c..e940bd5 100644 --- a/testdata/expected-plans/q12.txt +++ b/testdata/expected-plans/q12.txt @@ -23,8 +23,8 @@ SortExec: expr=[l_shipmode@0 ASC NULLS LAST], preserve_partitioning=[false] ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_shipmode@4 as l_shipmode] CoalesceBatchesExec: target_batch_size=8192 FilterExec: (l_shipmode@4 = FOB OR l_shipmode@4 = SHIP) AND l_receiptdate@3 > l_commitdate@2 AND l_shipdate@1 < l_commitdate@2 AND l_receiptdate@3 >= 1995-01-01 AND l_receiptdate@3 < 1996-01-01 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode], predicate=(l_shipmode@14 = FOB OR l_shipmode@14 = SHIP) AND l_receiptdate@12 > l_commitdate@11 AND l_shipdate@10 < l_commitdate@11 AND l_receiptdate@12 >= 1995-01-01 AND l_receiptdate@12 < 1996-01-01, pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= FOB AND FOB <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= SHIP AND SHIP <= l_shipmode_max@1 END) AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_max@4 >= 1995-01-01 END AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_min@7 < 1996-01-01 END, required_guarantees=[l_shipmode in (SHIP, FOB)] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_orderpriority] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode], predicate=(l_shipmode@14 = FOB OR l_shipmode@14 = SHIP) AND l_receiptdate@12 > l_commitdate@11 AND l_shipdate@10 < l_commitdate@11 AND l_receiptdate@12 >= 1995-01-01 AND l_receiptdate@12 < 1996-01-01, pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= FOB AND FOB <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= SHIP AND SHIP <= l_shipmode_max@1 END) AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_max@4 >= 1995-01-01 END AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_min@7 < 1996-01-01 END, required_guarantees=[l_shipmode in (SHIP, FOB)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_orderpriority] RaySQL Plan =========== @@ -39,6 +39,6 @@ SortExec: expr=[l_shipmode@0 ASC NULLS LAST], preserve_partitioning=[false] ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_shipmode@4 as l_shipmode] CoalesceBatchesExec: target_batch_size=8192 FilterExec: (l_shipmode@4 = FOB OR l_shipmode@4 = SHIP) AND l_receiptdate@3 > l_commitdate@2 AND l_shipdate@1 < l_commitdate@2 AND l_receiptdate@3 >= 1995-01-01 AND l_receiptdate@3 < 1996-01-01 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode], predicate=(l_shipmode@14 = FOB OR l_shipmode@14 = SHIP) AND l_receiptdate@12 > l_commitdate@11 AND l_shipdate@10 < l_commitdate@11 AND l_receiptdate@12 >= 1995-01-01 AND l_receiptdate@12 < 1996-01-01, pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= FOB AND FOB <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= SHIP AND SHIP <= l_shipmode_max@1 END) AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_max@4 >= 1995-01-01 END AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_min@7 < 1996-01-01 END, required_guarantees=[l_shipmode in (SHIP, FOB)] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_orderpriority] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode], predicate=(l_shipmode@14 = FOB OR l_shipmode@14 = SHIP) AND l_receiptdate@12 > l_commitdate@11 AND l_shipdate@10 < l_commitdate@11 AND l_receiptdate@12 >= 1995-01-01 AND l_receiptdate@12 < 1996-01-01, pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= FOB AND FOB <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= SHIP AND SHIP <= l_shipmode_max@1 END) AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_max@4 >= 1995-01-01 END AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_min@7 < 1996-01-01 END, required_guarantees=[l_shipmode in (SHIP, FOB)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_orderpriority] diff --git a/testdata/expected-plans/q13.txt b/testdata/expected-plans/q13.txt index 446b9b5..4dc1238 100644 --- a/testdata/expected-plans/q13.txt +++ b/testdata/expected-plans/q13.txt @@ -24,11 +24,11 @@ SortExec: expr=[custdist@1 DESC,c_count@0 DESC], preserve_partitioning=[false] AggregateExec: mode=Single, gby=[c_custkey@0 as c_custkey], aggr=[count(orders.o_orderkey)] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Left, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, o_orderkey@1] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey] ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_comment@2 NOT LIKE %express%requests% - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_comment], predicate=o_comment@8 NOT LIKE %express%requests% + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_comment], predicate=o_comment@8 NOT LIKE %express%requests% RaySQL Plan =========== @@ -41,9 +41,9 @@ SortExec: expr=[custdist@1 DESC,c_count@0 DESC], preserve_partitioning=[false] AggregateExec: mode=Single, gby=[c_custkey@0 as c_custkey], aggr=[count(orders.o_orderkey)] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Left, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, o_orderkey@1] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey] ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_comment@2 NOT LIKE %express%requests% - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_comment], predicate=o_comment@8 NOT LIKE %express%requests% + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_comment], predicate=o_comment@8 NOT LIKE %express%requests% diff --git a/testdata/expected-plans/q14.txt b/testdata/expected-plans/q14.txt index b1f39b6..3c70553 100644 --- a/testdata/expected-plans/q14.txt +++ b/testdata/expected-plans/q14.txt @@ -18,11 +18,11 @@ ProjectionExec: expr=[100 * CAST(sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") T ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, p_type@0 as p_type] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_type@1, l_extendedprice@3, l_discount@4] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_type] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_type] ProjectionExec: expr=[l_partkey@0 as l_partkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@3 >= 1995-02-01 AND l_shipdate@3 < 1995-03-01 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-02-01 AND l_shipdate@10 < 1995-03-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-02-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-03-01 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-02-01 AND l_shipdate@10 < 1995-03-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-02-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-03-01 END, required_guarantees=[] RaySQL Plan =========== @@ -33,9 +33,9 @@ ProjectionExec: expr=[100 * CAST(sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") T ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, p_type@0 as p_type] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_type@1, l_extendedprice@3, l_discount@4] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_type] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_type] ProjectionExec: expr=[l_partkey@0 as l_partkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@3 >= 1995-02-01 AND l_shipdate@3 < 1995-03-01 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-02-01 AND l_shipdate@10 < 1995-03-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-02-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-03-01 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-02-01 AND l_shipdate@10 < 1995-03-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-02-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-03-01 END, required_guarantees=[] diff --git a/testdata/expected-plans/q16.txt b/testdata/expected-plans/q16.txt index e5aeda9..6713d19 100644 --- a/testdata/expected-plans/q16.txt +++ b/testdata/expected-plans/q16.txt @@ -28,14 +28,14 @@ SortExec: expr=[supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 ASC NULLS ProjectionExec: expr=[s_suppkey@0 as s_suppkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: s_comment@1 LIKE %Customer%Complaints% - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_comment], predicate=s_comment@6 LIKE %Customer%Complaints% + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_comment], predicate=s_comment@6 LIKE %Customer%Complaints% ProjectionExec: expr=[ps_suppkey@3 as ps_suppkey, p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_brand@1, p_type@2, p_size@3, ps_suppkey@5] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_brand@1 != Brand#14 AND p_type@2 NOT LIKE SMALL PLATED% AND Use p_size@3 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]) - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_type, p_size], predicate=p_brand@3 != Brand#14 AND p_type@4 NOT LIKE SMALL PLATED% AND Use p_size@5 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]), pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 != Brand#14 OR Brand#14 != p_brand_max@1 END AND (CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 14 AND 14 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 6 AND 6 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 5 AND 5 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 31 AND 31 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 49 AND 49 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 15 AND 15 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 41 AND 41 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 47 AND 47 <= p_size_max@5 END), required_guarantees=[p_brand not in (Brand#14), p_size in (6, 31, 41, 15, 49, 5, 14, 47)] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_type, p_size], predicate=p_brand@3 != Brand#14 AND p_type@4 NOT LIKE SMALL PLATED% AND Use p_size@5 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]), pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 != Brand#14 OR Brand#14 != p_brand_max@1 END AND (CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 14 AND 14 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 6 AND 6 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 5 AND 5 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 31 AND 31 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 49 AND 49 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 15 AND 15 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 41 AND 41 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 47 AND 47 <= p_size_max@5 END), required_guarantees=[p_brand not in (Brand#14), p_size in (6, 31, 41, 15, 49, 5, 14, 47)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey] RaySQL Plan =========== @@ -50,12 +50,12 @@ SortExec: expr=[supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 ASC NULLS ProjectionExec: expr=[s_suppkey@0 as s_suppkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: s_comment@1 LIKE %Customer%Complaints% - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_comment], predicate=s_comment@6 LIKE %Customer%Complaints% + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_comment], predicate=s_comment@6 LIKE %Customer%Complaints% ProjectionExec: expr=[ps_suppkey@3 as ps_suppkey, p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_brand@1, p_type@2, p_size@3, ps_suppkey@5] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_brand@1 != Brand#14 AND p_type@2 NOT LIKE SMALL PLATED% AND Use p_size@3 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]) - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_type, p_size], predicate=p_brand@3 != Brand#14 AND p_type@4 NOT LIKE SMALL PLATED% AND Use p_size@5 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]), pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 != Brand#14 OR Brand#14 != p_brand_max@1 END AND (CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 14 AND 14 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 6 AND 6 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 5 AND 5 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 31 AND 31 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 49 AND 49 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 15 AND 15 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 41 AND 41 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 47 AND 47 <= p_size_max@5 END), required_guarantees=[p_brand not in (Brand#14), p_size in (6, 31, 41, 15, 49, 5, 14, 47)] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_type, p_size], predicate=p_brand@3 != Brand#14 AND p_type@4 NOT LIKE SMALL PLATED% AND Use p_size@5 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]), pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 != Brand#14 OR Brand#14 != p_brand_max@1 END AND (CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 14 AND 14 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 6 AND 6 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 5 AND 5 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 31 AND 31 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 49 AND 49 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 15 AND 15 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 41 AND 41 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 47 AND 47 <= p_size_max@5 END), required_guarantees=[p_brand not in (Brand#14), p_size in (6, 31, 41, 15, 49, 5, 14, 47)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey] diff --git a/testdata/expected-plans/q17.txt b/testdata/expected-plans/q17.txt index 7a03af1..251bc45 100644 --- a/testdata/expected-plans/q17.txt +++ b/testdata/expected-plans/q17.txt @@ -29,11 +29,11 @@ ProjectionExec: expr=[CAST(sum(lineitem.l_extendedprice)@0 AS Float64) / 7 as av ProjectionExec: expr=[p_partkey@0 as p_partkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_brand@1 = Brand#42 AND p_container@2 = LG BAG - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_container], predicate=p_brand@3 = Brand#42 AND p_container@6 = LG BAG, pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 <= Brand#42 AND Brand#42 <= p_brand_max@1 END AND CASE WHEN p_container_null_count@6 = p_container_row_count@7 THEN false ELSE p_container_min@4 <= LG BAG AND LG BAG <= p_container_max@5 END, required_guarantees=[p_brand in (Brand#42), p_container in (LG BAG)] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_quantity, l_extendedprice] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_container], predicate=p_brand@3 = Brand#42 AND p_container@6 = LG BAG, pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 <= Brand#42 AND Brand#42 <= p_brand_max@1 END AND CASE WHEN p_container_null_count@6 = p_container_row_count@7 THEN false ELSE p_container_min@4 <= LG BAG AND LG BAG <= p_container_max@5 END, required_guarantees=[p_brand in (Brand#42), p_container in (LG BAG)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_quantity, l_extendedprice] ProjectionExec: expr=[CAST(0.2 * CAST(avg(lineitem.l_quantity)@1 AS Float64) AS Decimal128(30, 15)) as Float64(0.2) * avg(lineitem.l_quantity), l_partkey@0 as l_partkey] AggregateExec: mode=Single, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_quantity] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_quantity] RaySQL Plan =========== @@ -49,9 +49,9 @@ ProjectionExec: expr=[CAST(sum(lineitem.l_extendedprice)@0 AS Float64) / 7 as av ProjectionExec: expr=[p_partkey@0 as p_partkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_brand@1 = Brand#42 AND p_container@2 = LG BAG - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_container], predicate=p_brand@3 = Brand#42 AND p_container@6 = LG BAG, pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 <= Brand#42 AND Brand#42 <= p_brand_max@1 END AND CASE WHEN p_container_null_count@6 = p_container_row_count@7 THEN false ELSE p_container_min@4 <= LG BAG AND LG BAG <= p_container_max@5 END, required_guarantees=[p_brand in (Brand#42), p_container in (LG BAG)] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_quantity, l_extendedprice] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_container], predicate=p_brand@3 = Brand#42 AND p_container@6 = LG BAG, pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 <= Brand#42 AND Brand#42 <= p_brand_max@1 END AND CASE WHEN p_container_null_count@6 = p_container_row_count@7 THEN false ELSE p_container_min@4 <= LG BAG AND LG BAG <= p_container_max@5 END, required_guarantees=[p_brand in (Brand#42), p_container in (LG BAG)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_quantity, l_extendedprice] ProjectionExec: expr=[CAST(0.2 * CAST(avg(lineitem.l_quantity)@1 AS Float64) AS Decimal128(30, 15)) as Float64(0.2) * avg(lineitem.l_quantity), l_partkey@0 as l_partkey] AggregateExec: mode=Single, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_quantity] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_quantity] diff --git a/testdata/expected-plans/q18.txt b/testdata/expected-plans/q18.txt index 10e413b..659fb02 100644 --- a/testdata/expected-plans/q18.txt +++ b/testdata/expected-plans/q18.txt @@ -29,14 +29,14 @@ SortExec: TopK(fetch=100), expr=[o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAS CoalesceBatchesExec: target_batch_size=8192 FilterExec: sum(lineitem.l_quantity)@1 > Some(31300),21,2 AggregateExec: mode=Single, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_quantity] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_quantity] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@2, l_orderkey@0)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@3, o_orderdate@4, l_quantity@6] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@4, o_orderdate@5] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_name] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_quantity] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_name] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_quantity] RaySQL Plan =========== @@ -50,12 +50,12 @@ SortExec: TopK(fetch=100), expr=[o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAS CoalesceBatchesExec: target_batch_size=8192 FilterExec: sum(lineitem.l_quantity)@1 > Some(31300),21,2 AggregateExec: mode=Single, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_quantity] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_quantity] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@2, l_orderkey@0)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@3, o_orderdate@4, l_quantity@6] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@4, o_orderdate@5] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_name] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_quantity] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_name] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_quantity] diff --git a/testdata/expected-plans/q19.txt b/testdata/expected-plans/q19.txt index a36fe83..748af1f 100644 --- a/testdata/expected-plans/q19.txt +++ b/testdata/expected-plans/q19.txt @@ -20,11 +20,11 @@ ProjectionExec: expr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_disco HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], filter=p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND l_quantity@0 >= Some(800),11,2 AND l_quantity@0 <= Some(1800),11,2 AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND l_quantity@0 >= Some(2000),11,2 AND l_quantity@0 <= Some(3000),11,2 AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND l_quantity@0 >= Some(3000),11,2 AND l_quantity@0 <= Some(4000),11,2 AND p_size@2 <= 15, projection=[l_extendedprice@6, l_discount@7] CoalesceBatchesExec: target_batch_size=8192 FilterExec: (p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@2 <= 15) AND p_size@2 >= 1 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_size, p_container], predicate=p_size@5 >= 1 AND (p_brand@3 = Brand#21 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@5 <= 5 OR p_brand@3 = Brand#13 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@5 <= 10 OR p_brand@3 = Brand#52 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@5 <= 15), pruning_predicate=CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_max@0 >= 1 END AND (CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#21 AND Brand#21 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM CASE AND SM CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM BOX AND SM BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PACK AND SM PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PKG AND SM PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 5 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#13 AND Brand#13 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BAG AND MED BAG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BOX AND MED BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PKG AND MED PKG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PACK AND MED PACK <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 10 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#52 AND Brand#52 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG CASE AND LG CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG BOX AND LG BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PACK AND LG PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PKG AND LG PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 15 END), required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_size, p_container], predicate=p_size@5 >= 1 AND (p_brand@3 = Brand#21 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@5 <= 5 OR p_brand@3 = Brand#13 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@5 <= 10 OR p_brand@3 = Brand#52 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@5 <= 15), pruning_predicate=CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_max@0 >= 1 END AND (CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#21 AND Brand#21 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM CASE AND SM CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM BOX AND SM BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PACK AND SM PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PKG AND SM PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 5 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#13 AND Brand#13 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BAG AND MED BAG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BOX AND MED BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PKG AND MED PKG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PACK AND MED PACK <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 10 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#52 AND Brand#52 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG CASE AND LG CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG BOX AND LG BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PACK AND LG PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PKG AND LG PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 15 END), required_guarantees=[] ProjectionExec: expr=[l_partkey@0 as l_partkey, l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: (l_quantity@1 >= Some(800),11,2 AND l_quantity@1 <= Some(1800),11,2 OR l_quantity@1 >= Some(2000),11,2 AND l_quantity@1 <= Some(3000),11,2 OR l_quantity@1 >= Some(3000),11,2 AND l_quantity@1 <= Some(4000),11,2) AND (l_shipmode@5 = AIR OR l_shipmode@5 = AIR REG) AND l_shipinstruct@4 = DELIVER IN PERSON - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], predicate=(l_shipmode@14 = AIR OR l_shipmode@14 = AIR REG) AND l_shipinstruct@13 = DELIVER IN PERSON AND (l_quantity@4 >= Some(800),11,2 AND l_quantity@4 <= Some(1800),11,2 OR l_quantity@4 >= Some(2000),11,2 AND l_quantity@4 <= Some(3000),11,2 OR l_quantity@4 >= Some(3000),11,2 AND l_quantity@4 <= Some(4000),11,2), pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR AND AIR <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR REG AND AIR REG <= l_shipmode_max@1 END) AND CASE WHEN l_shipinstruct_null_count@6 = l_shipinstruct_row_count@7 THEN false ELSE l_shipinstruct_min@4 <= DELIVER IN PERSON AND DELIVER IN PERSON <= l_shipinstruct_max@5 END AND (CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(800),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(1800),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(2000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(3000),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(3000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(4000),11,2 END), required_guarantees=[l_shipmode in (AIR REG, AIR), l_shipinstruct in (DELIVER IN PERSON)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], predicate=(l_shipmode@14 = AIR OR l_shipmode@14 = AIR REG) AND l_shipinstruct@13 = DELIVER IN PERSON AND (l_quantity@4 >= Some(800),11,2 AND l_quantity@4 <= Some(1800),11,2 OR l_quantity@4 >= Some(2000),11,2 AND l_quantity@4 <= Some(3000),11,2 OR l_quantity@4 >= Some(3000),11,2 AND l_quantity@4 <= Some(4000),11,2), pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR AND AIR <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR REG AND AIR REG <= l_shipmode_max@1 END) AND CASE WHEN l_shipinstruct_null_count@6 = l_shipinstruct_row_count@7 THEN false ELSE l_shipinstruct_min@4 <= DELIVER IN PERSON AND DELIVER IN PERSON <= l_shipinstruct_max@5 END AND (CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(800),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(1800),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(2000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(3000),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(3000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(4000),11,2 END), required_guarantees=[l_shipmode in (AIR REG, AIR), l_shipinstruct in (DELIVER IN PERSON)] RaySQL Plan =========== @@ -36,9 +36,9 @@ ProjectionExec: expr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_disco HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], filter=p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND l_quantity@0 >= Some(800),11,2 AND l_quantity@0 <= Some(1800),11,2 AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND l_quantity@0 >= Some(2000),11,2 AND l_quantity@0 <= Some(3000),11,2 AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND l_quantity@0 >= Some(3000),11,2 AND l_quantity@0 <= Some(4000),11,2 AND p_size@2 <= 15, projection=[l_extendedprice@6, l_discount@7] CoalesceBatchesExec: target_batch_size=8192 FilterExec: (p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@2 <= 15) AND p_size@2 >= 1 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_size, p_container], predicate=p_size@5 >= 1 AND (p_brand@3 = Brand#21 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@5 <= 5 OR p_brand@3 = Brand#13 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@5 <= 10 OR p_brand@3 = Brand#52 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@5 <= 15), pruning_predicate=CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_max@0 >= 1 END AND (CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#21 AND Brand#21 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM CASE AND SM CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM BOX AND SM BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PACK AND SM PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PKG AND SM PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 5 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#13 AND Brand#13 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BAG AND MED BAG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BOX AND MED BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PKG AND MED PKG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PACK AND MED PACK <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 10 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#52 AND Brand#52 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG CASE AND LG CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG BOX AND LG BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PACK AND LG PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PKG AND LG PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 15 END), required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_size, p_container], predicate=p_size@5 >= 1 AND (p_brand@3 = Brand#21 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@5 <= 5 OR p_brand@3 = Brand#13 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@5 <= 10 OR p_brand@3 = Brand#52 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@5 <= 15), pruning_predicate=CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_max@0 >= 1 END AND (CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#21 AND Brand#21 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM CASE AND SM CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM BOX AND SM BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PACK AND SM PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PKG AND SM PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 5 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#13 AND Brand#13 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BAG AND MED BAG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BOX AND MED BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PKG AND MED PKG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PACK AND MED PACK <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 10 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#52 AND Brand#52 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG CASE AND LG CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG BOX AND LG BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PACK AND LG PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PKG AND LG PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 15 END), required_guarantees=[] ProjectionExec: expr=[l_partkey@0 as l_partkey, l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: (l_quantity@1 >= Some(800),11,2 AND l_quantity@1 <= Some(1800),11,2 OR l_quantity@1 >= Some(2000),11,2 AND l_quantity@1 <= Some(3000),11,2 OR l_quantity@1 >= Some(3000),11,2 AND l_quantity@1 <= Some(4000),11,2) AND (l_shipmode@5 = AIR OR l_shipmode@5 = AIR REG) AND l_shipinstruct@4 = DELIVER IN PERSON - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], predicate=(l_shipmode@14 = AIR OR l_shipmode@14 = AIR REG) AND l_shipinstruct@13 = DELIVER IN PERSON AND (l_quantity@4 >= Some(800),11,2 AND l_quantity@4 <= Some(1800),11,2 OR l_quantity@4 >= Some(2000),11,2 AND l_quantity@4 <= Some(3000),11,2 OR l_quantity@4 >= Some(3000),11,2 AND l_quantity@4 <= Some(4000),11,2), pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR AND AIR <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR REG AND AIR REG <= l_shipmode_max@1 END) AND CASE WHEN l_shipinstruct_null_count@6 = l_shipinstruct_row_count@7 THEN false ELSE l_shipinstruct_min@4 <= DELIVER IN PERSON AND DELIVER IN PERSON <= l_shipinstruct_max@5 END AND (CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(800),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(1800),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(2000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(3000),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(3000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(4000),11,2 END), required_guarantees=[l_shipmode in (AIR REG, AIR), l_shipinstruct in (DELIVER IN PERSON)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], predicate=(l_shipmode@14 = AIR OR l_shipmode@14 = AIR REG) AND l_shipinstruct@13 = DELIVER IN PERSON AND (l_quantity@4 >= Some(800),11,2 AND l_quantity@4 <= Some(1800),11,2 OR l_quantity@4 >= Some(2000),11,2 AND l_quantity@4 <= Some(3000),11,2 OR l_quantity@4 >= Some(3000),11,2 AND l_quantity@4 <= Some(4000),11,2), pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR AND AIR <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR REG AND AIR REG <= l_shipmode_max@1 END) AND CASE WHEN l_shipinstruct_null_count@6 = l_shipinstruct_row_count@7 THEN false ELSE l_shipinstruct_min@4 <= DELIVER IN PERSON AND DELIVER IN PERSON <= l_shipinstruct_max@5 END AND (CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(800),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(1800),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(2000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(3000),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(3000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(4000),11,2 END), required_guarantees=[l_shipmode in (AIR REG, AIR), l_shipinstruct in (DELIVER IN PERSON)] diff --git a/testdata/expected-plans/q2.txt b/testdata/expected-plans/q2.txt index 94a1eb2..e36f6ba 100644 --- a/testdata/expected-plans/q2.txt +++ b/testdata/expected-plans/q2.txt @@ -50,22 +50,22 @@ SortExec: TopK(fetch=100), expr=[s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name ProjectionExec: expr=[r_regionkey@0 as r_regionkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: r_name@1 = ASIA - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] ProjectionExec: expr=[p_partkey@2 as p_partkey, p_mfgr@3 as p_mfgr, s_name@4 as s_name, s_address@5 as s_address, s_phone@6 as s_phone, s_acctbal@7 as s_acctbal, s_comment@8 as s_comment, ps_supplycost@9 as ps_supplycost, n_name@0 as n_name, n_regionkey@1 as n_regionkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@4)], projection=[n_name@1, n_regionkey@2, p_partkey@3, p_mfgr@4, s_name@5, s_address@6, s_phone@8, s_acctbal@9, s_comment@10, ps_supplycost@11] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] ProjectionExec: expr=[p_partkey@6 as p_partkey, p_mfgr@7 as p_mfgr, s_name@0 as s_name, s_address@1 as s_address, s_nationkey@2 as s_nationkey, s_phone@3 as s_phone, s_acctbal@4 as s_acctbal, s_comment@5 as s_comment, ps_supplycost@8 as ps_supplycost] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@2)], projection=[s_name@1, s_address@2, s_nationkey@3, s_phone@4, s_acctbal@5, s_comment@6, p_partkey@7, p_mfgr@8, ps_supplycost@10] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_partkey@0, p_mfgr@1, ps_suppkey@3, ps_supplycost@4] ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_size@3 = 48 AND p_type@2 LIKE %TIN - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_mfgr, p_type, p_size], predicate=p_size@5 = 48 AND p_type@4 LIKE %TIN, pruning_predicate=CASE WHEN p_size_null_count@2 = p_size_row_count@3 THEN false ELSE p_size_min@0 <= 48 AND 48 <= p_size_max@1 END, required_guarantees=[p_size in (48)] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_mfgr, p_type, p_size], predicate=p_size@5 = 48 AND p_type@4 LIKE %TIN, pruning_predicate=CASE WHEN p_size_null_count@2 = p_size_row_count@3 THEN false ELSE p_size_min@0 <= 48 AND 48 <= p_size_max@1 END, required_guarantees=[p_size in (48)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] ProjectionExec: expr=[min(partsupp.ps_supplycost)@1 as min(partsupp.ps_supplycost), ps_partkey@0 as ps_partkey] AggregateExec: mode=Single, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)] CoalesceBatchesExec: target_batch_size=8192 @@ -73,16 +73,16 @@ SortExec: TopK(fetch=100), expr=[s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name ProjectionExec: expr=[r_regionkey@0 as r_regionkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: r_name@1 = ASIA - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, n_regionkey@0 as n_regionkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_regionkey@1, ps_partkey@2, ps_supplycost@3] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_regionkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_regionkey] ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_supplycost@4] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] RaySQL Plan =========== @@ -97,22 +97,22 @@ SortExec: TopK(fetch=100), expr=[s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name ProjectionExec: expr=[r_regionkey@0 as r_regionkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: r_name@1 = ASIA - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] ProjectionExec: expr=[p_partkey@2 as p_partkey, p_mfgr@3 as p_mfgr, s_name@4 as s_name, s_address@5 as s_address, s_phone@6 as s_phone, s_acctbal@7 as s_acctbal, s_comment@8 as s_comment, ps_supplycost@9 as ps_supplycost, n_name@0 as n_name, n_regionkey@1 as n_regionkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@4)], projection=[n_name@1, n_regionkey@2, p_partkey@3, p_mfgr@4, s_name@5, s_address@6, s_phone@8, s_acctbal@9, s_comment@10, ps_supplycost@11] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] ProjectionExec: expr=[p_partkey@6 as p_partkey, p_mfgr@7 as p_mfgr, s_name@0 as s_name, s_address@1 as s_address, s_nationkey@2 as s_nationkey, s_phone@3 as s_phone, s_acctbal@4 as s_acctbal, s_comment@5 as s_comment, ps_supplycost@8 as ps_supplycost] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@2)], projection=[s_name@1, s_address@2, s_nationkey@3, s_phone@4, s_acctbal@5, s_comment@6, p_partkey@7, p_mfgr@8, ps_supplycost@10] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_partkey@0, p_mfgr@1, ps_suppkey@3, ps_supplycost@4] ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_size@3 = 48 AND p_type@2 LIKE %TIN - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_mfgr, p_type, p_size], predicate=p_size@5 = 48 AND p_type@4 LIKE %TIN, pruning_predicate=CASE WHEN p_size_null_count@2 = p_size_row_count@3 THEN false ELSE p_size_min@0 <= 48 AND 48 <= p_size_max@1 END, required_guarantees=[p_size in (48)] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_mfgr, p_type, p_size], predicate=p_size@5 = 48 AND p_type@4 LIKE %TIN, pruning_predicate=CASE WHEN p_size_null_count@2 = p_size_row_count@3 THEN false ELSE p_size_min@0 <= 48 AND 48 <= p_size_max@1 END, required_guarantees=[p_size in (48)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] ProjectionExec: expr=[min(partsupp.ps_supplycost)@1 as min(partsupp.ps_supplycost), ps_partkey@0 as ps_partkey] AggregateExec: mode=Single, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)] CoalesceBatchesExec: target_batch_size=8192 @@ -120,14 +120,14 @@ SortExec: TopK(fetch=100), expr=[s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name ProjectionExec: expr=[r_regionkey@0 as r_regionkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: r_name@1 = ASIA - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, n_regionkey@0 as n_regionkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_regionkey@1, ps_partkey@2, ps_supplycost@3] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_regionkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_regionkey] ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_supplycost@4] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] diff --git a/testdata/expected-plans/q20.txt b/testdata/expected-plans/q20.txt index 49c0843..f2a87f6 100644 --- a/testdata/expected-plans/q20.txt +++ b/testdata/expected-plans/q20.txt @@ -37,8 +37,8 @@ SortExec: expr=[s_name@0 ASC NULLS LAST], preserve_partitioning=[false] ProjectionExec: expr=[n_nationkey@0 as n_nationkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = KENYA - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = KENYA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= KENYA AND KENYA <= n_name_max@1 END, required_guarantees=[n_name in (KENYA)] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = KENYA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= KENYA AND KENYA <= n_name_max@1 END, required_guarantees=[n_name in (KENYA)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(ps_partkey@0, l_partkey@1), (ps_suppkey@1, l_suppkey@2)], filter=CAST(ps_availqty@0 AS Float64) > Float64(0.5) * sum(lineitem.l_quantity)@1, projection=[ps_suppkey@1] CoalesceBatchesExec: target_batch_size=8192 @@ -46,14 +46,14 @@ SortExec: expr=[s_name@0 ASC NULLS LAST], preserve_partitioning=[false] ProjectionExec: expr=[p_partkey@0 as p_partkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_name@1 LIKE blanched% - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE blanched% - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_availqty] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE blanched% + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_availqty] ProjectionExec: expr=[0.5 * CAST(sum(lineitem.l_quantity)@2 AS Float64) as Float64(0.5) * sum(lineitem.l_quantity), l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey] AggregateExec: mode=Single, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] ProjectionExec: expr=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey, l_quantity@2 as l_quantity] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@3 >= 1993-01-01 AND l_shipdate@3 < 1994-01-01 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], predicate=l_shipdate@10 >= 1993-01-01 AND l_shipdate@10 < 1994-01-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1993-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1994-01-01 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], predicate=l_shipdate@10 >= 1993-01-01 AND l_shipdate@10 < 1994-01-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1993-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1994-01-01 END, required_guarantees=[] RaySQL Plan =========== @@ -67,8 +67,8 @@ SortExec: expr=[s_name@0 ASC NULLS LAST], preserve_partitioning=[false] ProjectionExec: expr=[n_nationkey@0 as n_nationkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = KENYA - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = KENYA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= KENYA AND KENYA <= n_name_max@1 END, required_guarantees=[n_name in (KENYA)] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = KENYA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= KENYA AND KENYA <= n_name_max@1 END, required_guarantees=[n_name in (KENYA)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(ps_partkey@0, l_partkey@1), (ps_suppkey@1, l_suppkey@2)], filter=CAST(ps_availqty@0 AS Float64) > Float64(0.5) * sum(lineitem.l_quantity)@1, projection=[ps_suppkey@1] CoalesceBatchesExec: target_batch_size=8192 @@ -76,12 +76,12 @@ SortExec: expr=[s_name@0 ASC NULLS LAST], preserve_partitioning=[false] ProjectionExec: expr=[p_partkey@0 as p_partkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_name@1 LIKE blanched% - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE blanched% - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_availqty] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE blanched% + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_availqty] ProjectionExec: expr=[0.5 * CAST(sum(lineitem.l_quantity)@2 AS Float64) as Float64(0.5) * sum(lineitem.l_quantity), l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey] AggregateExec: mode=Single, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] ProjectionExec: expr=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey, l_quantity@2 as l_quantity] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@3 >= 1993-01-01 AND l_shipdate@3 < 1994-01-01 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], predicate=l_shipdate@10 >= 1993-01-01 AND l_shipdate@10 < 1994-01-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1993-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1994-01-01 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], predicate=l_shipdate@10 >= 1993-01-01 AND l_shipdate@10 < 1994-01-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1993-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1994-01-01 END, required_guarantees=[] diff --git a/testdata/expected-plans/q21.txt b/testdata/expected-plans/q21.txt index 3426574..1e7e3f5 100644 --- a/testdata/expected-plans/q21.txt +++ b/testdata/expected-plans/q21.txt @@ -49,25 +49,25 @@ SortExec: TopK(fetch=100), expr=[numwait@1 DESC,s_name@0 ASC NULLS LAST], preser ProjectionExec: expr=[n_nationkey@0 as n_nationkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = ARGENTINA - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ARGENTINA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ARGENTINA AND ARGENTINA <= n_name_max@1 END, required_guarantees=[n_name in (ARGENTINA)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ARGENTINA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ARGENTINA AND ARGENTINA <= n_name_max@1 END, required_guarantees=[n_name in (ARGENTINA)] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@2)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] ProjectionExec: expr=[o_orderkey@0 as o_orderkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderstatus@1 = F - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_orderstatus], predicate=o_orderstatus@2 = F, pruning_predicate=CASE WHEN o_orderstatus_null_count@2 = o_orderstatus_row_count@3 THEN false ELSE o_orderstatus_min@0 <= F AND F <= o_orderstatus_max@1 END, required_guarantees=[o_orderstatus in (F)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_orderstatus], predicate=o_orderstatus@2 = F, pruning_predicate=CASE WHEN o_orderstatus_null_count@2 = o_orderstatus_row_count@3 THEN false ELSE o_orderstatus_min@0 <= F AND F <= o_orderstatus_max@1 END, required_guarantees=[o_orderstatus in (F)] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_nationkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_nationkey] ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_receiptdate@3 > l_commitdate@2 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey] ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_receiptdate@3 > l_commitdate@2 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 RaySQL Plan =========== @@ -85,23 +85,23 @@ SortExec: TopK(fetch=100), expr=[numwait@1 DESC,s_name@0 ASC NULLS LAST], preser ProjectionExec: expr=[n_nationkey@0 as n_nationkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = ARGENTINA - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ARGENTINA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ARGENTINA AND ARGENTINA <= n_name_max@1 END, required_guarantees=[n_name in (ARGENTINA)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ARGENTINA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ARGENTINA AND ARGENTINA <= n_name_max@1 END, required_guarantees=[n_name in (ARGENTINA)] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@2)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] ProjectionExec: expr=[o_orderkey@0 as o_orderkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderstatus@1 = F - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_orderstatus], predicate=o_orderstatus@2 = F, pruning_predicate=CASE WHEN o_orderstatus_null_count@2 = o_orderstatus_row_count@3 THEN false ELSE o_orderstatus_min@0 <= F AND F <= o_orderstatus_max@1 END, required_guarantees=[o_orderstatus in (F)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_orderstatus], predicate=o_orderstatus@2 = F, pruning_predicate=CASE WHEN o_orderstatus_null_count@2 = o_orderstatus_row_count@3 THEN false ELSE o_orderstatus_min@0 <= F AND F <= o_orderstatus_max@1 END, required_guarantees=[o_orderstatus in (F)] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_nationkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_nationkey] ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_receiptdate@3 > l_commitdate@2 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey] ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_receiptdate@3 > l_commitdate@2 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 diff --git a/testdata/expected-plans/q22.txt b/testdata/expected-plans/q22.txt index 80433ec..3236874 100644 --- a/testdata/expected-plans/q22.txt +++ b/testdata/expected-plans/q22.txt @@ -31,13 +31,13 @@ SortExec: expr=[cntrycode@0 ASC NULLS LAST], preserve_partitioning=[false] ProjectionExec: expr=[c_acctbal@1 as c_acctbal] CoalesceBatchesExec: target_batch_size=8192 FilterExec: c_acctbal@1 > Some(0),11,2 AND Use substr(c_phone@0, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_phone, c_acctbal], predicate=c_acctbal@5 > Some(0),11,2 AND Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) AND c_acctbal@5 > Some(0),11,2, pruning_predicate=CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END AND CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_phone, c_acctbal], predicate=c_acctbal@5 > Some(0),11,2 AND Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) AND c_acctbal@5 > Some(0),11,2, pruning_predicate=CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END AND CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END, required_guarantees=[] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=LeftAnti, on=[(c_custkey@0, o_custkey@0)], projection=[c_phone@1, c_acctbal@2] CoalesceBatchesExec: target_batch_size=8192 FilterExec: Use substr(c_phone@1, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_phone, c_acctbal], predicate=Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_custkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_phone, c_acctbal], predicate=Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_custkey] RaySQL Plan =========== @@ -52,11 +52,11 @@ SortExec: expr=[cntrycode@0 ASC NULLS LAST], preserve_partitioning=[false] ProjectionExec: expr=[c_acctbal@1 as c_acctbal] CoalesceBatchesExec: target_batch_size=8192 FilterExec: c_acctbal@1 > Some(0),11,2 AND Use substr(c_phone@0, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_phone, c_acctbal], predicate=c_acctbal@5 > Some(0),11,2 AND Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) AND c_acctbal@5 > Some(0),11,2, pruning_predicate=CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END AND CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_phone, c_acctbal], predicate=c_acctbal@5 > Some(0),11,2 AND Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) AND c_acctbal@5 > Some(0),11,2, pruning_predicate=CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END AND CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END, required_guarantees=[] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=LeftAnti, on=[(c_custkey@0, o_custkey@0)], projection=[c_phone@1, c_acctbal@2] CoalesceBatchesExec: target_batch_size=8192 FilterExec: Use substr(c_phone@1, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_phone, c_acctbal], predicate=Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_custkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_phone, c_acctbal], predicate=Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_custkey] diff --git a/testdata/expected-plans/q3.txt b/testdata/expected-plans/q3.txt index 5161cdd..5e528af 100644 --- a/testdata/expected-plans/q3.txt +++ b/testdata/expected-plans/q3.txt @@ -31,14 +31,14 @@ SortExec: TopK(fetch=10), expr=[revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], pr ProjectionExec: expr=[c_custkey@0 as c_custkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: c_mktsegment@1 = BUILDING - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_mktsegment], predicate=c_mktsegment@6 = BUILDING, pruning_predicate=CASE WHEN c_mktsegment_null_count@2 = c_mktsegment_row_count@3 THEN false ELSE c_mktsegment_min@0 <= BUILDING AND BUILDING <= c_mktsegment_max@1 END, required_guarantees=[c_mktsegment in (BUILDING)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_mktsegment], predicate=c_mktsegment@6 = BUILDING, pruning_predicate=CASE WHEN c_mktsegment_null_count@2 = c_mktsegment_row_count@3 THEN false ELSE c_mktsegment_min@0 <= BUILDING AND BUILDING <= c_mktsegment_max@1 END, required_guarantees=[c_mktsegment in (BUILDING)] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@2 < 1995-03-15 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], predicate=o_orderdate@4 < 1995-03-15, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@0 < 1995-03-15 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], predicate=o_orderdate@4 < 1995-03-15, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@0 < 1995-03-15 END, required_guarantees=[] ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@3 > 1995-03-15 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 > 1995-03-15, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 > 1995-03-15 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 > 1995-03-15, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 > 1995-03-15 END, required_guarantees=[] RaySQL Plan =========== @@ -54,12 +54,12 @@ SortExec: TopK(fetch=10), expr=[revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], pr ProjectionExec: expr=[c_custkey@0 as c_custkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: c_mktsegment@1 = BUILDING - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_mktsegment], predicate=c_mktsegment@6 = BUILDING, pruning_predicate=CASE WHEN c_mktsegment_null_count@2 = c_mktsegment_row_count@3 THEN false ELSE c_mktsegment_min@0 <= BUILDING AND BUILDING <= c_mktsegment_max@1 END, required_guarantees=[c_mktsegment in (BUILDING)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_mktsegment], predicate=c_mktsegment@6 = BUILDING, pruning_predicate=CASE WHEN c_mktsegment_null_count@2 = c_mktsegment_row_count@3 THEN false ELSE c_mktsegment_min@0 <= BUILDING AND BUILDING <= c_mktsegment_max@1 END, required_guarantees=[c_mktsegment in (BUILDING)] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@2 < 1995-03-15 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], predicate=o_orderdate@4 < 1995-03-15, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@0 < 1995-03-15 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], predicate=o_orderdate@4 < 1995-03-15, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@0 < 1995-03-15 END, required_guarantees=[] ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@3 > 1995-03-15 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 > 1995-03-15, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 > 1995-03-15 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 > 1995-03-15, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 > 1995-03-15 END, required_guarantees=[] diff --git a/testdata/expected-plans/q4.txt b/testdata/expected-plans/q4.txt index e4ae1e3..3a99247 100644 --- a/testdata/expected-plans/q4.txt +++ b/testdata/expected-plans/q4.txt @@ -25,11 +25,11 @@ SortExec: expr=[o_orderpriority@0 ASC NULLS LAST], preserve_partitioning=[false] ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_orderpriority@2 as o_orderpriority] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@1 >= 1995-04-01 AND o_orderdate@1 < 1995-07-01 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_orderdate, o_orderpriority], predicate=o_orderdate@4 >= 1995-04-01 AND o_orderdate@4 < 1995-07-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-04-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-07-01 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_orderdate, o_orderpriority], predicate=o_orderdate@4 >= 1995-04-01 AND o_orderdate@4 < 1995-07-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-04-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-07-01 END, required_guarantees=[] ProjectionExec: expr=[l_orderkey@0 as l_orderkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_receiptdate@2 > l_commitdate@1 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 RaySQL Plan =========== @@ -43,9 +43,9 @@ SortExec: expr=[o_orderpriority@0 ASC NULLS LAST], preserve_partitioning=[false] ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_orderpriority@2 as o_orderpriority] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@1 >= 1995-04-01 AND o_orderdate@1 < 1995-07-01 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_orderdate, o_orderpriority], predicate=o_orderdate@4 >= 1995-04-01 AND o_orderdate@4 < 1995-07-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-04-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-07-01 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_orderdate, o_orderpriority], predicate=o_orderdate@4 >= 1995-04-01 AND o_orderdate@4 < 1995-07-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-04-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-07-01 END, required_guarantees=[] ProjectionExec: expr=[l_orderkey@0 as l_orderkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_receiptdate@2 > l_commitdate@1 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 diff --git a/testdata/expected-plans/q5.txt b/testdata/expected-plans/q5.txt index f5c9022..2423a97 100644 --- a/testdata/expected-plans/q5.txt +++ b/testdata/expected-plans/q5.txt @@ -36,25 +36,25 @@ SortExec: expr=[revenue@1 DESC], preserve_partitioning=[false] ProjectionExec: expr=[r_regionkey@0 as r_regionkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: r_name@1 = AFRICA - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = AFRICA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= AFRICA AND AFRICA <= r_name_max@1 END, required_guarantees=[r_name in (AFRICA)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = AFRICA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= AFRICA AND AFRICA <= r_name_max@1 END, required_guarantees=[r_name in (AFRICA)] ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, n_name@0 as n_name, n_regionkey@1 as n_regionkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, n_regionkey@2, l_extendedprice@3, l_discount@4] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@0 as s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1), (s_nationkey@1, c_nationkey@0)], projection=[s_nationkey@1, l_extendedprice@4, l_discount@5] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@1, l_orderkey@0)], projection=[c_nationkey@0, l_suppkey@3, l_extendedprice@4, l_discount@5] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_nationkey@1, o_orderkey@2] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_nationkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_nationkey] ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@2 >= 1994-01-01 AND o_orderdate@2 < 1995-01-01 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1994-01-01 AND o_orderdate@4 < 1995-01-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1994-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-01-01 END, required_guarantees=[] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1994-01-01 AND o_orderdate@4 < 1995-01-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1994-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-01-01 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] RaySQL Plan =========== @@ -68,23 +68,23 @@ SortExec: expr=[revenue@1 DESC], preserve_partitioning=[false] ProjectionExec: expr=[r_regionkey@0 as r_regionkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: r_name@1 = AFRICA - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = AFRICA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= AFRICA AND AFRICA <= r_name_max@1 END, required_guarantees=[r_name in (AFRICA)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = AFRICA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= AFRICA AND AFRICA <= r_name_max@1 END, required_guarantees=[r_name in (AFRICA)] ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, n_name@0 as n_name, n_regionkey@1 as n_regionkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, n_regionkey@2, l_extendedprice@3, l_discount@4] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@0 as s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1), (s_nationkey@1, c_nationkey@0)], projection=[s_nationkey@1, l_extendedprice@4, l_discount@5] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@1, l_orderkey@0)], projection=[c_nationkey@0, l_suppkey@3, l_extendedprice@4, l_discount@5] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_nationkey@1, o_orderkey@2] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_nationkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_nationkey] ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@2 >= 1994-01-01 AND o_orderdate@2 < 1995-01-01 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1994-01-01 AND o_orderdate@4 < 1995-01-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1994-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-01-01 END, required_guarantees=[] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1994-01-01 AND o_orderdate@4 < 1995-01-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1994-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-01-01 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] diff --git a/testdata/expected-plans/q6.txt b/testdata/expected-plans/q6.txt index 50f2026..88c45cb 100644 --- a/testdata/expected-plans/q6.txt +++ b/testdata/expected-plans/q6.txt @@ -15,7 +15,7 @@ ProjectionExec: expr=[sum(lineitem.l_extendedprice * lineitem.l_discount)@0 as r ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@3 >= 1994-01-01 AND l_shipdate@3 < 1995-01-01 AND l_discount@2 >= Some(3),11,2 AND l_discount@2 <= Some(5),11,2 AND l_quantity@0 < Some(2400),11,2 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1994-01-01 AND l_shipdate@10 < 1995-01-01 AND l_discount@6 >= Some(3),11,2 AND l_discount@6 <= Some(5),11,2 AND l_quantity@4 < Some(2400),11,2, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1994-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-01-01 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_max@4 >= Some(3),11,2 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_min@7 <= Some(5),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@8 < Some(2400),11,2 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1994-01-01 AND l_shipdate@10 < 1995-01-01 AND l_discount@6 >= Some(3),11,2 AND l_discount@6 <= Some(5),11,2 AND l_quantity@4 < Some(2400),11,2, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1994-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-01-01 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_max@4 >= Some(3),11,2 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_min@7 <= Some(5),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@8 < Some(2400),11,2 END, required_guarantees=[] RaySQL Plan =========== @@ -26,5 +26,5 @@ ProjectionExec: expr=[sum(lineitem.l_extendedprice * lineitem.l_discount)@0 as r ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@3 >= 1994-01-01 AND l_shipdate@3 < 1995-01-01 AND l_discount@2 >= Some(3),11,2 AND l_discount@2 <= Some(5),11,2 AND l_quantity@0 < Some(2400),11,2 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1994-01-01 AND l_shipdate@10 < 1995-01-01 AND l_discount@6 >= Some(3),11,2 AND l_discount@6 <= Some(5),11,2 AND l_quantity@4 < Some(2400),11,2, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1994-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-01-01 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_max@4 >= Some(3),11,2 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_min@7 <= Some(5),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@8 < Some(2400),11,2 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1994-01-01 AND l_shipdate@10 < 1995-01-01 AND l_discount@6 >= Some(3),11,2 AND l_discount@6 <= Some(5),11,2 AND l_quantity@4 < Some(2400),11,2, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1994-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-01-01 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_max@4 >= Some(3),11,2 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_min@7 <= Some(5),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@8 < Some(2400),11,2 END, required_guarantees=[] diff --git a/testdata/expected-plans/q7.txt b/testdata/expected-plans/q7.txt index 6d64219..190dad4 100644 --- a/testdata/expected-plans/q7.txt +++ b/testdata/expected-plans/q7.txt @@ -38,26 +38,26 @@ SortExec: expr=[supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS LAST,l_year HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], filter=n_name@0 = GERMANY AND n_name@1 = IRAQ OR n_name@0 = IRAQ AND n_name@1 = GERMANY, projection=[n_name@1, l_extendedprice@2, l_discount@3, l_shipdate@4, n_name@6] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = IRAQ OR n_name@1 = GERMANY - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = IRAQ OR n_name@1 = GERMANY, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = IRAQ OR n_name@1 = GERMANY, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_shipdate@3 as l_shipdate, c_nationkey@4 as c_nationkey, n_name@0 as n_name] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@0)], projection=[n_name@1, l_extendedprice@3, l_discount@4, l_shipdate@5, c_nationkey@6] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = GERMANY OR n_name@1 = IRAQ - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = GERMANY OR n_name@1 = IRAQ, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = GERMANY OR n_name@1 = IRAQ, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] ProjectionExec: expr=[s_nationkey@1 as s_nationkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, l_shipdate@4 as l_shipdate, c_nationkey@0 as c_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@4)], projection=[c_nationkey@1, s_nationkey@2, l_extendedprice@3, l_discount@4, l_shipdate@5] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_nationkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(l_orderkey@1, o_orderkey@0)], projection=[s_nationkey@0, l_extendedprice@2, l_discount@3, l_shipdate@4, o_custkey@6] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5, l_shipdate@6] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@4 >= 1995-01-01 AND l_shipdate@4 <= 1996-12-31 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-01-01 AND l_shipdate@10 <= 1996-12-31, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 <= 1996-12-31 END, required_guarantees=[] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-01-01 AND l_shipdate@10 <= 1996-12-31, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 <= 1996-12-31 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey] RaySQL Plan =========== @@ -71,24 +71,24 @@ SortExec: expr=[supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS LAST,l_year HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], filter=n_name@0 = GERMANY AND n_name@1 = IRAQ OR n_name@0 = IRAQ AND n_name@1 = GERMANY, projection=[n_name@1, l_extendedprice@2, l_discount@3, l_shipdate@4, n_name@6] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = IRAQ OR n_name@1 = GERMANY - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = IRAQ OR n_name@1 = GERMANY, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = IRAQ OR n_name@1 = GERMANY, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_shipdate@3 as l_shipdate, c_nationkey@4 as c_nationkey, n_name@0 as n_name] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@0)], projection=[n_name@1, l_extendedprice@3, l_discount@4, l_shipdate@5, c_nationkey@6] CoalesceBatchesExec: target_batch_size=8192 FilterExec: n_name@1 = GERMANY OR n_name@1 = IRAQ - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = GERMANY OR n_name@1 = IRAQ, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = GERMANY OR n_name@1 = IRAQ, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] ProjectionExec: expr=[s_nationkey@1 as s_nationkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, l_shipdate@4 as l_shipdate, c_nationkey@0 as c_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@4)], projection=[c_nationkey@1, s_nationkey@2, l_extendedprice@3, l_discount@4, l_shipdate@5] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_nationkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(l_orderkey@1, o_orderkey@0)], projection=[s_nationkey@0, l_extendedprice@2, l_discount@3, l_shipdate@4, o_custkey@6] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5, l_shipdate@6] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@4 >= 1995-01-01 AND l_shipdate@4 <= 1996-12-31 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-01-01 AND l_shipdate@10 <= 1996-12-31, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 <= 1996-12-31 END, required_guarantees=[] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-01-01 AND l_shipdate@10 <= 1996-12-31, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 <= 1996-12-31 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey] diff --git a/testdata/expected-plans/q8.txt b/testdata/expected-plans/q8.txt index e83a9c1..ddd66ff 100644 --- a/testdata/expected-plans/q8.txt +++ b/testdata/expected-plans/q8.txt @@ -47,36 +47,36 @@ SortExec: expr=[o_year@0 ASC NULLS LAST], preserve_partitioning=[false] ProjectionExec: expr=[r_regionkey@0 as r_regionkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: r_name@1 = MIDDLE EAST - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = MIDDLE EAST, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= MIDDLE EAST AND MIDDLE EAST <= r_name_max@1 END, required_guarantees=[r_name in (MIDDLE EAST)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = MIDDLE EAST, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= MIDDLE EAST AND MIDDLE EAST <= r_name_max@1 END, required_guarantees=[r_name in (MIDDLE EAST)] ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, o_orderdate@3 as o_orderdate, n_regionkey@4 as n_regionkey, n_name@0 as n_name] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, l_extendedprice@2, l_discount@3, o_orderdate@5, n_regionkey@6] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, n_regionkey@0 as n_regionkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, c_nationkey@4)], projection=[n_regionkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@5] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_regionkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_regionkey] ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, c_nationkey@0 as c_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@3)], projection=[c_nationkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@6] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_nationkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_nationkey] ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, o_custkey@0 as o_custkey, o_orderdate@1 as o_orderdate] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_custkey@1, o_orderdate@2, l_extendedprice@4, l_discount@5, s_nationkey@6] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@2 >= 1995-01-01 AND o_orderdate@2 <= 1996-12-31 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1995-01-01 AND o_orderdate@4 <= 1996-12-31, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 <= 1996-12-31 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1995-01-01 AND o_orderdate@4 <= 1996-12-31, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 <= 1996-12-31 END, required_guarantees=[] ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@0 as s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_suppkey@3, l_extendedprice@4, l_discount@5] ProjectionExec: expr=[p_partkey@0 as p_partkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_type@1 = LARGE PLATED STEEL - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_type], predicate=p_type@4 = LARGE PLATED STEEL, pruning_predicate=CASE WHEN p_type_null_count@2 = p_type_row_count@3 THEN false ELSE p_type_min@0 <= LARGE PLATED STEEL AND LARGE PLATED STEEL <= p_type_max@1 END, required_guarantees=[p_type in (LARGE PLATED STEEL)] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_type], predicate=p_type@4 = LARGE PLATED STEEL, pruning_predicate=CASE WHEN p_type_null_count@2 = p_type_row_count@3 THEN false ELSE p_type_min@0 <= LARGE PLATED STEEL AND LARGE PLATED STEEL <= p_type_max@1 END, required_guarantees=[p_type in (LARGE PLATED STEEL)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] RaySQL Plan =========== @@ -91,34 +91,34 @@ SortExec: expr=[o_year@0 ASC NULLS LAST], preserve_partitioning=[false] ProjectionExec: expr=[r_regionkey@0 as r_regionkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: r_name@1 = MIDDLE EAST - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = MIDDLE EAST, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= MIDDLE EAST AND MIDDLE EAST <= r_name_max@1 END, required_guarantees=[r_name in (MIDDLE EAST)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = MIDDLE EAST, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= MIDDLE EAST AND MIDDLE EAST <= r_name_max@1 END, required_guarantees=[r_name in (MIDDLE EAST)] ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, o_orderdate@3 as o_orderdate, n_regionkey@4 as n_regionkey, n_name@0 as n_name] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, l_extendedprice@2, l_discount@3, o_orderdate@5, n_regionkey@6] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, n_regionkey@0 as n_regionkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, c_nationkey@4)], projection=[n_regionkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@5] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_regionkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_regionkey] ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, c_nationkey@0 as c_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@3)], projection=[c_nationkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@6] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_nationkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_nationkey] ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, o_custkey@0 as o_custkey, o_orderdate@1 as o_orderdate] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_custkey@1, o_orderdate@2, l_extendedprice@4, l_discount@5, s_nationkey@6] CoalesceBatchesExec: target_batch_size=8192 FilterExec: o_orderdate@2 >= 1995-01-01 AND o_orderdate@2 <= 1996-12-31 - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1995-01-01 AND o_orderdate@4 <= 1996-12-31, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 <= 1996-12-31 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1995-01-01 AND o_orderdate@4 <= 1996-12-31, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 <= 1996-12-31 END, required_guarantees=[] ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@0 as s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_suppkey@3, l_extendedprice@4, l_discount@5] ProjectionExec: expr=[p_partkey@0 as p_partkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_type@1 = LARGE PLATED STEEL - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_type], predicate=p_type@4 = LARGE PLATED STEEL, pruning_predicate=CASE WHEN p_type_null_count@2 = p_type_row_count@3 THEN false ELSE p_type_min@0 <= LARGE PLATED STEEL AND LARGE PLATED STEEL <= p_type_max@1 END, required_guarantees=[p_type in (LARGE PLATED STEEL)] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_type], predicate=p_type@4 = LARGE PLATED STEEL, pruning_predicate=CASE WHEN p_type_null_count@2 = p_type_row_count@3 THEN false ELSE p_type_min@0 <= LARGE PLATED STEEL AND LARGE PLATED STEEL <= p_type_max@1 END, required_guarantees=[p_type in (LARGE PLATED STEEL)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] diff --git a/testdata/expected-plans/q9.txt b/testdata/expected-plans/q9.txt index 203a47e..ca953ca 100644 --- a/testdata/expected-plans/q9.txt +++ b/testdata/expected-plans/q9.txt @@ -33,26 +33,26 @@ SortExec: expr=[nation@0 ASC NULLS LAST,o_year@1 DESC], preserve_partitioning=[f ProjectionExec: expr=[n_name@0 as nation, date_part(YEAR, o_orderdate@5) as o_year, l_extendedprice@2 * (Some(1),20,0 - l_discount@3) - ps_supplycost@4 * l_quantity@1 as amount] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[n_name@1, l_quantity@2, l_extendedprice@3, l_discount@4, ps_supplycost@6, o_orderdate@7] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, ps_supplycost@5 as ps_supplycost, o_orderdate@0 as o_orderdate] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, l_quantity@3, l_extendedprice@4, l_discount@5, s_nationkey@6, ps_supplycost@7] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_orderdate] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_orderdate] ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_quantity@2 as l_quantity, l_extendedprice@3 as l_extendedprice, l_discount@4 as l_discount, s_nationkey@5 as s_nationkey, ps_supplycost@0 as ps_supplycost] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(ps_suppkey@1, l_suppkey@2), (ps_partkey@0, l_partkey@1)], projection=[ps_supplycost@2, l_orderkey@3, l_quantity@6, l_extendedprice@7, l_discount@8, s_nationkey@9] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_partkey@2 as l_partkey, l_suppkey@3 as l_suppkey, l_quantity@4 as l_quantity, l_extendedprice@5 as l_extendedprice, l_discount@6 as l_discount, s_nationkey@0 as s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@2)], projection=[s_nationkey@1, l_orderkey@2, l_partkey@3, l_suppkey@4, l_quantity@5, l_extendedprice@6, l_discount@7] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_partkey@2, l_suppkey@3, l_quantity@4, l_extendedprice@5, l_discount@6] ProjectionExec: expr=[p_partkey@0 as p_partkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_name@1 LIKE %moccasin% - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE %moccasin% - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE %moccasin% + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] RaySQL Plan =========== @@ -64,24 +64,24 @@ SortExec: expr=[nation@0 ASC NULLS LAST,o_year@1 DESC], preserve_partitioning=[f ProjectionExec: expr=[n_name@0 as nation, date_part(YEAR, o_orderdate@5) as o_year, l_extendedprice@2 * (Some(1),20,0 - l_discount@3) - ps_supplycost@4 * l_quantity@1 as amount] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[n_name@1, l_quantity@2, l_extendedprice@3, l_discount@4, ps_supplycost@6, o_orderdate@7] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, ps_supplycost@5 as ps_supplycost, o_orderdate@0 as o_orderdate] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, l_quantity@3, l_extendedprice@4, l_discount@5, s_nationkey@6, ps_supplycost@7] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_orderdate] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_orderdate] ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_quantity@2 as l_quantity, l_extendedprice@3 as l_extendedprice, l_discount@4 as l_discount, s_nationkey@5 as s_nationkey, ps_supplycost@0 as ps_supplycost] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(ps_suppkey@1, l_suppkey@2), (ps_partkey@0, l_partkey@1)], projection=[ps_supplycost@2, l_orderkey@3, l_quantity@6, l_extendedprice@7, l_discount@8, s_nationkey@9] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_partkey@2 as l_partkey, l_suppkey@3 as l_suppkey, l_quantity@4 as l_quantity, l_extendedprice@5 as l_extendedprice, l_discount@6 as l_discount, s_nationkey@0 as s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@2)], projection=[s_nationkey@1, l_orderkey@2, l_partkey@3, l_suppkey@4, l_quantity@5, l_extendedprice@6, l_discount@7] - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_partkey@2, l_suppkey@3, l_quantity@4, l_extendedprice@5, l_discount@6] ProjectionExec: expr=[p_partkey@0 as p_partkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: p_name@1 LIKE %moccasin% - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE %moccasin% - ParquetExec: file_groups={1 group: [[home/edmondo/Development/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE %moccasin% + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] From a74ed3743499beada21dac08fda31bc6c6ff3fc5 Mon Sep 17 00:00:00 2001 From: Edmondo Porcu Date: Thu, 3 Oct 2024 17:23:53 -0400 Subject: [PATCH 08/14] Using TPCH Dbgen from Databricks --- .github/workflows/rust.yml | 2 +- scripts/gen-test-data.sh | 72 ++++++-- testdata/expected-plans/q1.txt | 41 +++-- testdata/expected-plans/q10.txt | 137 ++++++++++----- testdata/expected-plans/q11.txt | 191 +++++++++++++++------ testdata/expected-plans/q12.txt | 71 +++++--- testdata/expected-plans/q13.txt | 79 ++++++--- testdata/expected-plans/q14.txt | 59 +++++-- testdata/expected-plans/q16.txt | 120 +++++++++---- testdata/expected-plans/q17.txt | 82 ++++++--- testdata/expected-plans/q18.txt | 106 +++++++++--- testdata/expected-plans/q19.txt | 61 ++++--- testdata/expected-plans/q2.txt | 292 +++++++++++++++++++++++--------- testdata/expected-plans/q20.txt | 155 ++++++++++++----- testdata/expected-plans/q21.txt | 196 ++++++++++++++------- testdata/expected-plans/q22.txt | 103 +++++++---- testdata/expected-plans/q3.txt | 113 ++++++++---- testdata/expected-plans/q4.txt | 83 ++++++--- testdata/expected-plans/q5.txt | 191 +++++++++++++++------ testdata/expected-plans/q6.txt | 26 ++- testdata/expected-plans/q7.txt | 196 +++++++++++++++------ testdata/expected-plans/q8.txt | 264 +++++++++++++++++++++-------- testdata/expected-plans/q9.txt | 181 +++++++++++++++----- 23 files changed, 2019 insertions(+), 802 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 3e59e61..7ac8057 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -7,7 +7,7 @@ on: env: CARGO_TERM_COLOR: always PYTHON_VERSION: 3.9 - TPCH_SAMPLING_RATE: "0.1" + TPCH_SCALING_FACTOR: "1" TPCH_TEST_PARTITIONS: "1" TPCH_DATA_PATH: "data" diff --git a/scripts/gen-test-data.sh b/scripts/gen-test-data.sh index 4c7560a..a46a01f 100755 --- a/scripts/gen-test-data.sh +++ b/scripts/gen-test-data.sh @@ -1,24 +1,60 @@ #!/bin/bash set -e -# Create necessary directories -mkdir -p data -mkdir -p test_files/tpch/data - -# Check if the data folder is empty -if [ -z "$(ls -A data)" ]; then - echo "Data folder is empty. Cloning repository..." - git clone https://github.com/databricks/tpch-dbgen.git tpch/tpch-dbgen + +create_directories() { + mkdir -p data +} + +clone_and_build_tpch_dbgen() { + if [ -z "$(ls -A tpch/tpch-dbgen)" ]; then + echo "tpch/tpch-dbgen folder is empty. Cloning repository..." + git clone https://github.com/databricks/tpch-dbgen.git tpch/tpch-dbgen + cd tpch/tpch-dbgen + make + cd ../../ + else + echo "tpch/tpch-dbgen folder is not empty. Skipping cloning of TPCH dbgen." + fi +} + +generate_data() { cd tpch/tpch-dbgen - make - # consistent with DataFusion test strategy - ./dbgen -f -s "$TPCH_SAMPLING_RATE" - pwd - ls - mv ./*.tbl ../../data + if [ "$TPCH_TEST_PARTITIONS" -gt 1 ]; then + for i in $(seq 1 "$TPCH_TEST_PARTITIONS"); do + ./dbgen -f -s "$TPCH_SCALING_FACTOR" -C "$TPCH_TEST_PARTITIONS" -S "$i" + done + else + ./dbgen -f -s "$TPCH_SCALING_FACTOR" + fi + mv ./*.tbl* ../../data +} + +convert_data() { cd ../../ - pwd python -m tpch.tpchgen convert --partitions "$TPCH_TEST_PARTITIONS" -else - echo "Data folder is not empty. Skipping cloning and data generation." -fi +} + +main() { + if [ -z "$TPCH_TEST_PARTITIONS" ]; then + echo "Error: TPCH_TEST_PARTITIONS is not set." + exit 1 + fi + + if [ -z "$TPCH_SCALING_FACTOR" ]; then + echo "Error: TPCH_SCALING_FACTOR is not set." + exit 1 + fi + + create_directories + + if [ -z "$(ls -A data)" ]; then + clone_and_build_tpch_dbgen + generate_data + convert_data + else + echo "Data folder is not empty. Skipping cloning and data generation." + fi +} + +main diff --git a/testdata/expected-plans/q1.txt b/testdata/expected-plans/q1.txt index 1b9c6a5..d0b5599 100644 --- a/testdata/expected-plans/q1.txt +++ b/testdata/expected-plans/q1.txt @@ -11,23 +11,38 @@ Sort: lineitem.l_returnflag ASC NULLS LAST, lineitem.l_linestatus ASC NULLS LAST DataFusion Physical Plan ======================== -SortExec: expr=[l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST], preserve_partitioning=[false] - ProjectionExec: expr=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus, sum(lineitem.l_quantity)@2 as sum_qty, sum(lineitem.l_extendedprice)@3 as sum_base_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@4 as sum_disc_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax)@5 as sum_charge, avg(lineitem.l_quantity)@6 as avg_qty, avg(lineitem.l_extendedprice)@7 as avg_price, avg(lineitem.l_discount)@8 as avg_disc, count(*)@9 as count_order] - AggregateExec: mode=Single, gby=[l_returnflag@5 as l_returnflag, l_linestatus@6 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(*)] - ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, l_quantity@0 as l_quantity, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_tax@3 as l_tax, l_returnflag@4 as l_returnflag, l_linestatus@5 as l_linestatus] +SortPreservingMergeExec: [l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST] + SortExec: expr=[l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus, sum(lineitem.l_quantity)@2 as sum_qty, sum(lineitem.l_extendedprice)@3 as sum_base_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@4 as sum_disc_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax)@5 as sum_charge, avg(lineitem.l_quantity)@6 as avg_qty, avg(lineitem.l_extendedprice)@7 as avg_price, avg(lineitem.l_discount)@8 as avg_disc, count(*)@9 as count_order] + AggregateExec: mode=FinalPartitioned, gby=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(*)] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@6 <= 1998-09-24 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], predicate=l_shipdate@10 <= 1998-09-24, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@0 <= 1998-09-24 END, required_guarantees=[] + RepartitionExec: partitioning=Hash([l_returnflag@0, l_linestatus@1], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[l_returnflag@5 as l_returnflag, l_linestatus@6 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(*)] + ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, l_quantity@0 as l_quantity, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_tax@3 as l_tax, l_returnflag@4 as l_returnflag, l_linestatus@5 as l_linestatus] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@6 <= 1998-09-24 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], predicate=l_shipdate@10 <= 1998-09-24, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@0 <= 1998-09-24 END, required_guarantees=[] RaySQL Plan =========== -Query Stage #0 (1 -> 1): -SortExec: expr=[l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST], preserve_partitioning=[false] - ProjectionExec: expr=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus, sum(lineitem.l_quantity)@2 as sum_qty, sum(lineitem.l_extendedprice)@3 as sum_base_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@4 as sum_disc_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax)@5 as sum_charge, avg(lineitem.l_quantity)@6 as avg_qty, avg(lineitem.l_extendedprice)@7 as avg_price, avg(lineitem.l_discount)@8 as avg_disc, count(*)@9 as count_order] - AggregateExec: mode=Single, gby=[l_returnflag@5 as l_returnflag, l_linestatus@6 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(*)] - ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, l_quantity@0 as l_quantity, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_tax@3 as l_tax, l_returnflag@4 as l_returnflag, l_linestatus@5 as l_linestatus] +Query Stage #0 (4 -> 4): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "l_returnflag", index: 0 }, Column { name: "l_linestatus", index: 1 }], 4)) + AggregateExec: mode=Partial, gby=[l_returnflag@5 as l_returnflag, l_linestatus@6 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(*)] + ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, l_quantity@0 as l_quantity, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_tax@3 as l_tax, l_returnflag@4 as l_returnflag, l_linestatus@5 as l_linestatus] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@6 <= 1998-09-24 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], predicate=l_shipdate@10 <= 1998-09-24, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@0 <= 1998-09-24 END, required_guarantees=[] + +Query Stage #1 (4 -> 4): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "l_returnflag", index: 0 }, Column { name: "l_linestatus", index: 1 }], 4)) + SortExec: expr=[l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus, sum(lineitem.l_quantity)@2 as sum_qty, sum(lineitem.l_extendedprice)@3 as sum_base_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@4 as sum_disc_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax)@5 as sum_charge, avg(lineitem.l_quantity)@6 as avg_qty, avg(lineitem.l_extendedprice)@7 as avg_price, avg(lineitem.l_discount)@8 as avg_disc, count(*)@9 as count_order] + AggregateExec: mode=FinalPartitioned, gby=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(*)] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@6 <= 1998-09-24 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], predicate=l_shipdate@10 <= 1998-09-24, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@0 <= 1998-09-24 END, required_guarantees=[] + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "l_returnflag", index: 0 }, Column { name: "l_linestatus", index: 1 }], 4)) + +Query Stage #2 (4 -> 1): +SortPreservingMergeExec: [l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST] + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "l_returnflag", index: 0 }, Column { name: "l_linestatus", index: 1 }], 4)) diff --git a/testdata/expected-plans/q10.txt b/testdata/expected-plans/q10.txt index b6e9f3f..08ebdfc 100644 --- a/testdata/expected-plans/q10.txt +++ b/testdata/expected-plans/q10.txt @@ -23,49 +23,108 @@ Limit: skip=0, fetch=20 DataFusion Physical Plan ======================== -SortExec: TopK(fetch=20), expr=[revenue@2 DESC], preserve_partitioning=[false] - ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@7 as revenue, c_acctbal@2 as c_acctbal, n_name@4 as n_name, c_address@5 as c_address, c_phone@3 as c_phone, c_comment@6 as c_comment] - AggregateExec: mode=Single, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@4 as c_acctbal, c_phone@3 as c_phone, n_name@8 as n_name, c_address@2 as c_address, c_comment@5 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - ProjectionExec: expr=[c_custkey@1 as c_custkey, c_name@2 as c_name, c_address@3 as c_address, c_phone@4 as c_phone, c_acctbal@5 as c_acctbal, c_comment@6 as c_comment, l_extendedprice@7 as l_extendedprice, l_discount@8 as l_discount, n_name@0 as n_name] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], projection=[n_name@1, c_custkey@2, c_name@3, c_address@4, c_phone@6, c_acctbal@7, c_comment@8, l_extendedprice@9, l_discount@10] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@7, l_orderkey@0)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, l_extendedprice@9, l_discount@10] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, o_orderkey@7] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] - ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 >= 1993-07-01 AND o_orderdate@2 < 1993-10-01 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1993-07-01 AND o_orderdate@4 < 1993-10-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1993-07-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1993-10-01 END, required_guarantees=[] - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] +GlobalLimitExec: skip=0, fetch=20 + SortPreservingMergeExec: [revenue@2 DESC], fetch=20 + SortExec: TopK(fetch=20), expr=[revenue@2 DESC], preserve_partitioning=[true] + ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@7 as revenue, c_acctbal@2 as c_acctbal, n_name@4 as n_name, c_address@5 as c_address, c_phone@3 as c_phone, c_comment@6 as c_comment] + AggregateExec: mode=FinalPartitioned, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@2 as c_acctbal, c_phone@3 as c_phone, n_name@4 as n_name, c_address@5 as c_address, c_comment@6 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([c_custkey@0, c_name@1, c_acctbal@2, c_phone@3, n_name@4, c_address@5, c_comment@6], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@4 as c_acctbal, c_phone@3 as c_phone, n_name@8 as n_name, c_address@2 as c_address, c_comment@5 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + ProjectionExec: expr=[c_custkey@1 as c_custkey, c_name@2 as c_name, c_address@3 as c_address, c_phone@4 as c_phone, c_acctbal@5 as c_acctbal, c_comment@6 as c_comment, l_extendedprice@7 as l_extendedprice, l_discount@8 as l_discount, n_name@0 as n_name] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_returnflag@3 = R - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], predicate=l_returnflag@8 = R, pruning_predicate=CASE WHEN l_returnflag_null_count@2 = l_returnflag_row_count@3 THEN false ELSE l_returnflag_min@0 <= R AND R <= l_returnflag_max@1 END, required_guarantees=[l_returnflag in (R)] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], projection=[n_name@1, c_custkey@2, c_name@3, c_address@4, c_phone@6, c_acctbal@7, c_comment@8, l_extendedprice@9, l_discount@10] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([c_nationkey@3], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@7, l_orderkey@0)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, l_extendedprice@9, l_discount@10] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_orderkey@7], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, o_orderkey@7] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3390424], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3390424..6780848], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6780848..6782476, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3388796], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3388796..6779220]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4 + ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@2 >= 1993-07-01 AND o_orderdate@2 < 1993-10-01 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1993-07-01 AND o_orderdate@4 < 1993-10-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1993-07-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1993-10-01 END, required_guarantees=[] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_returnflag@3 = R + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], predicate=l_returnflag@8 = R, pruning_predicate=CASE WHEN l_returnflag_null_count@2 = l_returnflag_row_count@3 THEN false ELSE l_returnflag_min@0 <= R AND R <= l_returnflag_max@1 END, required_guarantees=[l_returnflag in (R)] RaySQL Plan =========== -Query Stage #0 (1 -> 1): -SortExec: TopK(fetch=20), expr=[revenue@2 DESC], preserve_partitioning=[false] - ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@7 as revenue, c_acctbal@2 as c_acctbal, n_name@4 as n_name, c_address@5 as c_address, c_phone@3 as c_phone, c_comment@6 as c_comment] - AggregateExec: mode=Single, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@4 as c_acctbal, c_phone@3 as c_phone, n_name@8 as n_name, c_address@2 as c_address, c_comment@5 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - ProjectionExec: expr=[c_custkey@1 as c_custkey, c_name@2 as c_name, c_address@3 as c_address, c_phone@4 as c_phone, c_acctbal@5 as c_acctbal, c_comment@6 as c_comment, l_extendedprice@7 as l_extendedprice, l_discount@8 as l_discount, n_name@0 as n_name] +Query Stage #0 (1 -> 4): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] + +Query Stage #1 (4 -> 4): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3390424], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3390424..6780848], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6780848..6782476, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3388796], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3388796..6779220]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] + +Query Stage #2 (4 -> 4): +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) + ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@2 >= 1993-07-01 AND o_orderdate@2 < 1993-10-01 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1993-07-01 AND o_orderdate@4 < 1993-10-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1993-07-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1993-10-01 END, required_guarantees=[] + +Query Stage #3 (4 -> 4): +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "o_orderkey", index: 7 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, o_orderkey@7] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) + +Query Stage #4 (4 -> 4): +ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_returnflag@3 = R + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], predicate=l_returnflag@8 = R, pruning_predicate=CASE WHEN l_returnflag_null_count@2 = l_returnflag_row_count@3 THEN false ELSE l_returnflag_min@0 <= R AND R <= l_returnflag_max@1 END, required_guarantees=[l_returnflag in (R)] + +Query Stage #5 (4 -> 4): +ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "c_nationkey", index: 3 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@7, l_orderkey@0)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, l_extendedprice@9, l_discount@10] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "o_orderkey", index: 7 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) + +Query Stage #6 (4 -> 4): +ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }, Column { name: "c_name", index: 1 }, Column { name: "c_acctbal", index: 2 }, Column { name: "c_phone", index: 3 }, Column { name: "n_name", index: 4 }, Column { name: "c_address", index: 5 }, Column { name: "c_comment", index: 6 }], 4)) + AggregateExec: mode=Partial, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@4 as c_acctbal, c_phone@3 as c_phone, n_name@8 as n_name, c_address@2 as c_address, c_comment@5 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + ProjectionExec: expr=[c_custkey@1 as c_custkey, c_name@2 as c_name, c_address@3 as c_address, c_phone@4 as c_phone, c_acctbal@5 as c_acctbal, c_comment@6 as c_comment, l_extendedprice@7 as l_extendedprice, l_discount@8 as l_discount, n_name@0 as n_name] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], projection=[n_name@1, c_custkey@2, c_name@3, c_address@4, c_phone@6, c_acctbal@7, c_comment@8, l_extendedprice@9, l_discount@10] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "c_nationkey", index: 3 }], 4)) + +Query Stage #7 (4 -> 4): +ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }, Column { name: "c_name", index: 1 }, Column { name: "c_acctbal", index: 3 }, Column { name: "c_phone", index: 6 }, Column { name: "n_name", index: 4 }, Column { name: "c_address", index: 5 }, Column { name: "c_comment", index: 7 }], 4)) + SortExec: TopK(fetch=20), expr=[revenue@2 DESC], preserve_partitioning=[true] + ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@7 as revenue, c_acctbal@2 as c_acctbal, n_name@4 as n_name, c_address@5 as c_address, c_phone@3 as c_phone, c_comment@6 as c_comment] + AggregateExec: mode=FinalPartitioned, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@2 as c_acctbal, c_phone@3 as c_phone, n_name@4 as n_name, c_address@5 as c_address, c_comment@6 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], projection=[n_name@1, c_custkey@2, c_name@3, c_address@4, c_phone@6, c_acctbal@7, c_comment@8, l_extendedprice@9, l_discount@10] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@7, l_orderkey@0)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, l_extendedprice@9, l_discount@10] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, o_orderkey@7] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] - ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 >= 1993-07-01 AND o_orderdate@2 < 1993-10-01 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1993-07-01 AND o_orderdate@4 < 1993-10-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1993-07-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1993-10-01 END, required_guarantees=[] - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_returnflag@3 = R - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], predicate=l_returnflag@8 = R, pruning_predicate=CASE WHEN l_returnflag_null_count@2 = l_returnflag_row_count@3 THEN false ELSE l_returnflag_min@0 <= R AND R <= l_returnflag_max@1 END, required_guarantees=[l_returnflag in (R)] + ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }, Column { name: "c_name", index: 1 }, Column { name: "c_acctbal", index: 2 }, Column { name: "c_phone", index: 3 }, Column { name: "n_name", index: 4 }, Column { name: "c_address", index: 5 }, Column { name: "c_comment", index: 6 }], 4)) + +Query Stage #8 (1 -> 1): +GlobalLimitExec: skip=0, fetch=20 + SortPreservingMergeExec: [revenue@2 DESC], fetch=20 + ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }, Column { name: "c_name", index: 1 }, Column { name: "c_acctbal", index: 3 }, Column { name: "c_phone", index: 6 }, Column { name: "n_name", index: 4 }, Column { name: "c_address", index: 5 }, Column { name: "c_comment", index: 7 }], 4)) diff --git a/testdata/expected-plans/q11.txt b/testdata/expected-plans/q11.txt index 5b6c7ae..b32d75a 100644 --- a/testdata/expected-plans/q11.txt +++ b/testdata/expected-plans/q11.txt @@ -30,65 +30,148 @@ Sort: value DESC NULLS FIRST DataFusion Physical Plan ======================== -SortExec: expr=[value@1 DESC], preserve_partitioning=[false] - ProjectionExec: expr=[ps_partkey@1 as ps_partkey, sum(partsupp.ps_supplycost * partsupp.ps_availqty)@2 as value] - NestedLoopJoinExec: join_type=Inner, filter=CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Decimal128(38, 15)) > sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)@1 - ProjectionExec: expr=[CAST(CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 AS Decimal128(38, 15)) as sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)] - AggregateExec: mode=Single, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[ps_availqty@1, ps_supplycost@2] - ProjectionExec: expr=[n_nationkey@0 as n_nationkey] +SortPreservingMergeExec: [value@1 DESC] + SortExec: expr=[value@1 DESC], preserve_partitioning=[true] + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, sum(partsupp.ps_supplycost * partsupp.ps_availqty)@2 as value] + NestedLoopJoinExec: join_type=Inner, filter=CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Decimal128(38, 15)) > sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)@1 + ProjectionExec: expr=[CAST(CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 AS Decimal128(38, 15)) as sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)] + AggregateExec: mode=Final, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] + CoalescePartitionsExec + AggregateExec: mode=Partial, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = ALGERIA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] - ProjectionExec: expr=[ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[ps_availqty@1, ps_supplycost@2] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 + ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = ALGERIA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4 + ProjectionExec: expr=[ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_nationkey@1, ps_availqty@3, ps_supplycost@4] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10445614], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10445614..20890506, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..722], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:722..10446336], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10446336..20891947]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost] + AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_nationkey@1, ps_availqty@3, ps_supplycost@4] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost] - AggregateExec: mode=Single, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[ps_partkey@1, ps_availqty@2, ps_supplycost@3] - ProjectionExec: expr=[n_nationkey@0 as n_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = ALGERIA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] - ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_availqty@2 as ps_availqty, ps_supplycost@3 as ps_supplycost, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_availqty@4, ps_supplycost@5] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[ps_partkey@1, ps_availqty@2, ps_supplycost@3] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 + ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = ALGERIA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_nationkey@3], 4), input_partitions=4 + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_availqty@2 as ps_availqty, ps_supplycost@3 as ps_supplycost, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_availqty@4, ps_supplycost@5] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([ps_suppkey@1], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10445614], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10445614..20890506, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..722], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:722..10446336], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10446336..20891947]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] RaySQL Plan =========== -Query Stage #0 (1 -> 1): -SortExec: expr=[value@1 DESC], preserve_partitioning=[false] - ProjectionExec: expr=[ps_partkey@1 as ps_partkey, sum(partsupp.ps_supplycost * partsupp.ps_availqty)@2 as value] - NestedLoopJoinExec: join_type=Inner, filter=CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Decimal128(38, 15)) > sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)@1 - ProjectionExec: expr=[CAST(CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 AS Decimal128(38, 15)) as sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)] - AggregateExec: mode=Single, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[ps_availqty@1, ps_supplycost@2] - ProjectionExec: expr=[n_nationkey@0 as n_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = ALGERIA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] - ProjectionExec: expr=[ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_nationkey@1, ps_availqty@3, ps_supplycost@4] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost] - AggregateExec: mode=Single, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] +Query Stage #0 (1 -> 4): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = ALGERIA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] + +Query Stage #1 (2 -> 4): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + +Query Stage #2 (4 -> 4): +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4)) + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10445614], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10445614..20890506, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..722], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:722..10446336], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10446336..20891947]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost] + +Query Stage #3 (4 -> 4): +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) + ProjectionExec: expr=[ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_nationkey@1, ps_availqty@3, ps_supplycost@4] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4)) + +Query Stage #4 (4 -> 1): +ShuffleWriterExec(stage_id=4, output_partitioning=Hash([], 4)) + AggregateExec: mode=Partial, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[ps_availqty@1, ps_supplycost@2] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[ps_partkey@1, ps_availqty@2, ps_supplycost@3] - ProjectionExec: expr=[n_nationkey@0 as n_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = ALGERIA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] - ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_availqty@2 as ps_availqty, ps_supplycost@3 as ps_supplycost, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_availqty@4, ps_supplycost@5] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) + +Query Stage #5 (1 -> 4): +ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = ALGERIA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] + +Query Stage #6 (2 -> 4): +ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + +Query Stage #7 (4 -> 4): +ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }], 4)) + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10445614], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10445614..20890506, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..722], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:722..10446336], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10446336..20891947]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] + +Query Stage #8 (4 -> 4): +ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4)) + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_availqty@2 as ps_availqty, ps_supplycost@3 as ps_supplycost, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_availqty@4, ps_supplycost@5] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }], 4)) + +Query Stage #9 (4 -> 4): +ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) + AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[ps_partkey@1, ps_availqty@2, ps_supplycost@3] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4)) + +Query Stage #10 (4 -> 4): +ShuffleWriterExec(stage_id=10, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) + SortExec: expr=[value@1 DESC], preserve_partitioning=[true] + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, sum(partsupp.ps_supplycost * partsupp.ps_availqty)@2 as value] + NestedLoopJoinExec: join_type=Inner, filter=CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Decimal128(38, 15)) > sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)@1 + ProjectionExec: expr=[CAST(CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 AS Decimal128(38, 15)) as sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)] + AggregateExec: mode=Final, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] + CoalescePartitionsExec + ShuffleReaderExec(stage_id=4, input_partitioning=Hash([], 4)) + AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) + +Query Stage #11 (4 -> 1): +SortPreservingMergeExec: [value@1 DESC] + ShuffleReaderExec(stage_id=10, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) diff --git a/testdata/expected-plans/q12.txt b/testdata/expected-plans/q12.txt index e940bd5..3b1a165 100644 --- a/testdata/expected-plans/q12.txt +++ b/testdata/expected-plans/q12.txt @@ -14,31 +14,60 @@ Sort: lineitem.l_shipmode ASC NULLS LAST DataFusion Physical Plan ======================== -SortExec: expr=[l_shipmode@0 ASC NULLS LAST], preserve_partitioning=[false] - ProjectionExec: expr=[l_shipmode@0 as l_shipmode, sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)@1 as high_line_count, sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)@2 as low_line_count] - AggregateExec: mode=Single, gby=[l_shipmode@1 as l_shipmode], aggr=[sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)] - ProjectionExec: expr=[o_orderpriority@1 as o_orderpriority, l_shipmode@0 as l_shipmode] +SortPreservingMergeExec: [l_shipmode@0 ASC NULLS LAST] + SortExec: expr=[l_shipmode@0 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[l_shipmode@0 as l_shipmode, sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)@1 as high_line_count, sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)@2 as low_line_count] + AggregateExec: mode=FinalPartitioned, gby=[l_shipmode@0 as l_shipmode], aggr=[sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(l_orderkey@0, o_orderkey@0)], projection=[l_shipmode@1, o_orderpriority@3] - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_shipmode@4 as l_shipmode] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: (l_shipmode@4 = FOB OR l_shipmode@4 = SHIP) AND l_receiptdate@3 > l_commitdate@2 AND l_shipdate@1 < l_commitdate@2 AND l_receiptdate@3 >= 1995-01-01 AND l_receiptdate@3 < 1996-01-01 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode], predicate=(l_shipmode@14 = FOB OR l_shipmode@14 = SHIP) AND l_receiptdate@12 > l_commitdate@11 AND l_shipdate@10 < l_commitdate@11 AND l_receiptdate@12 >= 1995-01-01 AND l_receiptdate@12 < 1996-01-01, pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= FOB AND FOB <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= SHIP AND SHIP <= l_shipmode_max@1 END) AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_max@4 >= 1995-01-01 END AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_min@7 < 1996-01-01 END, required_guarantees=[l_shipmode in (SHIP, FOB)] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_orderpriority] + RepartitionExec: partitioning=Hash([l_shipmode@0], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[l_shipmode@1 as l_shipmode], aggr=[sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)] + ProjectionExec: expr=[o_orderpriority@1 as o_orderpriority, l_shipmode@0 as l_shipmode] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_orderkey@0, o_orderkey@0)], projection=[l_shipmode@1, o_orderpriority@3] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_shipmode@4 as l_shipmode] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: (l_shipmode@4 = FOB OR l_shipmode@4 = SHIP) AND l_receiptdate@3 > l_commitdate@2 AND l_shipdate@1 < l_commitdate@2 AND l_receiptdate@3 >= 1995-01-01 AND l_receiptdate@3 < 1996-01-01 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode], predicate=(l_shipmode@14 = FOB OR l_shipmode@14 = SHIP) AND l_receiptdate@12 > l_commitdate@11 AND l_shipdate@10 < l_commitdate@11 AND l_receiptdate@12 >= 1995-01-01 AND l_receiptdate@12 < 1996-01-01, pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= FOB AND FOB <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= SHIP AND SHIP <= l_shipmode_max@1 END) AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_max@4 >= 1995-01-01 END AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_min@7 < 1996-01-01 END, required_guarantees=[l_shipmode in (SHIP, FOB)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_orderpriority] RaySQL Plan =========== -Query Stage #0 (1 -> 1): -SortExec: expr=[l_shipmode@0 ASC NULLS LAST], preserve_partitioning=[false] - ProjectionExec: expr=[l_shipmode@0 as l_shipmode, sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)@1 as high_line_count, sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)@2 as low_line_count] - AggregateExec: mode=Single, gby=[l_shipmode@1 as l_shipmode], aggr=[sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)] - ProjectionExec: expr=[o_orderpriority@1 as o_orderpriority, l_shipmode@0 as l_shipmode] +Query Stage #0 (4 -> 4): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_shipmode@4 as l_shipmode] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: (l_shipmode@4 = FOB OR l_shipmode@4 = SHIP) AND l_receiptdate@3 > l_commitdate@2 AND l_shipdate@1 < l_commitdate@2 AND l_receiptdate@3 >= 1995-01-01 AND l_receiptdate@3 < 1996-01-01 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode], predicate=(l_shipmode@14 = FOB OR l_shipmode@14 = SHIP) AND l_receiptdate@12 > l_commitdate@11 AND l_shipdate@10 < l_commitdate@11 AND l_receiptdate@12 >= 1995-01-01 AND l_receiptdate@12 < 1996-01-01, pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= FOB AND FOB <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= SHIP AND SHIP <= l_shipmode_max@1 END) AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_max@4 >= 1995-01-01 END AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_min@7 < 1996-01-01 END, required_guarantees=[l_shipmode in (SHIP, FOB)] + +Query Stage #1 (4 -> 4): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_orderpriority] + +Query Stage #2 (4 -> 4): +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "l_shipmode", index: 0 }], 4)) + AggregateExec: mode=Partial, gby=[l_shipmode@1 as l_shipmode], aggr=[sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)] + ProjectionExec: expr=[o_orderpriority@1 as o_orderpriority, l_shipmode@0 as l_shipmode] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_orderkey@0, o_orderkey@0)], projection=[l_shipmode@1, o_orderpriority@3] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) + +Query Stage #3 (4 -> 4): +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "l_shipmode", index: 0 }], 4)) + SortExec: expr=[l_shipmode@0 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[l_shipmode@0 as l_shipmode, sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)@1 as high_line_count, sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)@2 as low_line_count] + AggregateExec: mode=FinalPartitioned, gby=[l_shipmode@0 as l_shipmode], aggr=[sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(l_orderkey@0, o_orderkey@0)], projection=[l_shipmode@1, o_orderpriority@3] - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_shipmode@4 as l_shipmode] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: (l_shipmode@4 = FOB OR l_shipmode@4 = SHIP) AND l_receiptdate@3 > l_commitdate@2 AND l_shipdate@1 < l_commitdate@2 AND l_receiptdate@3 >= 1995-01-01 AND l_receiptdate@3 < 1996-01-01 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode], predicate=(l_shipmode@14 = FOB OR l_shipmode@14 = SHIP) AND l_receiptdate@12 > l_commitdate@11 AND l_shipdate@10 < l_commitdate@11 AND l_receiptdate@12 >= 1995-01-01 AND l_receiptdate@12 < 1996-01-01, pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= FOB AND FOB <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= SHIP AND SHIP <= l_shipmode_max@1 END) AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_max@4 >= 1995-01-01 END AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_min@7 < 1996-01-01 END, required_guarantees=[l_shipmode in (SHIP, FOB)] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_orderpriority] + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "l_shipmode", index: 0 }], 4)) + +Query Stage #4 (4 -> 1): +SortPreservingMergeExec: [l_shipmode@0 ASC NULLS LAST] + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "l_shipmode", index: 0 }], 4)) diff --git a/testdata/expected-plans/q13.txt b/testdata/expected-plans/q13.txt index 4dc1238..cee4109 100644 --- a/testdata/expected-plans/q13.txt +++ b/testdata/expected-plans/q13.txt @@ -17,33 +17,62 @@ Sort: custdist DESC NULLS FIRST, c_orders.c_count DESC NULLS FIRST DataFusion Physical Plan ======================== -SortExec: expr=[custdist@1 DESC,c_count@0 DESC], preserve_partitioning=[false] - ProjectionExec: expr=[c_count@0 as c_count, count(*)@1 as custdist] - AggregateExec: mode=Single, gby=[c_count@0 as c_count], aggr=[count(*)] - ProjectionExec: expr=[count(orders.o_orderkey)@1 as c_count] - AggregateExec: mode=Single, gby=[c_custkey@0 as c_custkey], aggr=[count(orders.o_orderkey)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Left, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, o_orderkey@1] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey] - ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_comment@2 NOT LIKE %express%requests% - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_comment], predicate=o_comment@8 NOT LIKE %express%requests% +SortPreservingMergeExec: [custdist@1 DESC,c_count@0 DESC] + SortExec: expr=[custdist@1 DESC,c_count@0 DESC], preserve_partitioning=[true] + ProjectionExec: expr=[c_count@0 as c_count, count(*)@1 as custdist] + AggregateExec: mode=FinalPartitioned, gby=[c_count@0 as c_count], aggr=[count(*)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([c_count@0], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[c_count@0 as c_count], aggr=[count(*)] + ProjectionExec: expr=[count(orders.o_orderkey)@1 as c_count] + AggregateExec: mode=SinglePartitioned, gby=[c_custkey@0 as c_custkey], aggr=[count(orders.o_orderkey)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Left, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, o_orderkey@1] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3390424], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3390424..6780848], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6780848..6782476, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3388796], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3388796..6779220]]}, projection=[c_custkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4 + ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_comment@2 NOT LIKE %express%requests% + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_custkey, o_comment], predicate=o_comment@8 NOT LIKE %express%requests% RaySQL Plan =========== -Query Stage #0 (1 -> 1): -SortExec: expr=[custdist@1 DESC,c_count@0 DESC], preserve_partitioning=[false] - ProjectionExec: expr=[c_count@0 as c_count, count(*)@1 as custdist] - AggregateExec: mode=Single, gby=[c_count@0 as c_count], aggr=[count(*)] - ProjectionExec: expr=[count(orders.o_orderkey)@1 as c_count] - AggregateExec: mode=Single, gby=[c_custkey@0 as c_custkey], aggr=[count(orders.o_orderkey)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Left, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, o_orderkey@1] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey] - ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_comment@2 NOT LIKE %express%requests% - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_comment], predicate=o_comment@8 NOT LIKE %express%requests% +Query Stage #0 (4 -> 4): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3390424], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3390424..6780848], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6780848..6782476, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3388796], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3388796..6779220]]}, projection=[c_custkey] + +Query Stage #1 (4 -> 4): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) + ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_comment@2 NOT LIKE %express%requests% + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_custkey, o_comment], predicate=o_comment@8 NOT LIKE %express%requests% + +Query Stage #2 (4 -> 4): +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "c_count", index: 0 }], 4)) + AggregateExec: mode=Partial, gby=[c_count@0 as c_count], aggr=[count(*)] + ProjectionExec: expr=[count(orders.o_orderkey)@1 as c_count] + AggregateExec: mode=SinglePartitioned, gby=[c_custkey@0 as c_custkey], aggr=[count(orders.o_orderkey)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Left, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, o_orderkey@1] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) + +Query Stage #3 (4 -> 4): +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "c_count", index: 0 }], 4)) + SortExec: expr=[custdist@1 DESC,c_count@0 DESC], preserve_partitioning=[true] + ProjectionExec: expr=[c_count@0 as c_count, count(*)@1 as custdist] + AggregateExec: mode=FinalPartitioned, gby=[c_count@0 as c_count], aggr=[count(*)] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "c_count", index: 0 }], 4)) + +Query Stage #4 (4 -> 1): +SortPreservingMergeExec: [custdist@1 DESC,c_count@0 DESC] + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "c_count", index: 0 }], 4)) diff --git a/testdata/expected-plans/q14.txt b/testdata/expected-plans/q14.txt index 3c70553..b390e35 100644 --- a/testdata/expected-plans/q14.txt +++ b/testdata/expected-plans/q14.txt @@ -14,28 +14,51 @@ DataFusion Physical Plan ======================== ProjectionExec: expr=[100 * CAST(sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END)@0 AS Float64) / CAST(sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 AS Float64) as promo_revenue] - AggregateExec: mode=Single, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, p_type@0 as p_type] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_type@1, l_extendedprice@3, l_discount@4] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_type] - ProjectionExec: expr=[l_partkey@0 as l_partkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@3 >= 1995-02-01 AND l_shipdate@3 < 1995-03-01 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-02-01 AND l_shipdate@10 < 1995-03-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-02-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-03-01 END, required_guarantees=[] + AggregateExec: mode=Final, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalescePartitionsExec + AggregateExec: mode=Partial, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, p_type@0 as p_type] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_type@1, l_extendedprice@3, l_discount@4] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_type] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4 + ProjectionExec: expr=[l_partkey@0 as l_partkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@3 >= 1995-02-01 AND l_shipdate@3 < 1995-03-01 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-02-01 AND l_shipdate@10 < 1995-03-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-02-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-03-01 END, required_guarantees=[] RaySQL Plan =========== -Query Stage #0 (1 -> 1): -ProjectionExec: expr=[100 * CAST(sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END)@0 AS Float64) / CAST(sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 AS Float64) as promo_revenue] - AggregateExec: mode=Single, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] +Query Stage #0 (2 -> 4): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_type] + +Query Stage #1 (4 -> 4): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) + ProjectionExec: expr=[l_partkey@0 as l_partkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@3 >= 1995-02-01 AND l_shipdate@3 < 1995-03-01 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-02-01 AND l_shipdate@10 < 1995-03-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-02-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-03-01 END, required_guarantees=[] + +Query Stage #2 (4 -> 1): +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([], 4)) + AggregateExec: mode=Partial, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, p_type@0 as p_type] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_type@1, l_extendedprice@3, l_discount@4] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_type] - ProjectionExec: expr=[l_partkey@0 as l_partkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@3 >= 1995-02-01 AND l_shipdate@3 < 1995-03-01 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-02-01 AND l_shipdate@10 < 1995-03-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-02-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-03-01 END, required_guarantees=[] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_type@1, l_extendedprice@3, l_discount@4] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) + +Query Stage #3 (1 -> 1): +ProjectionExec: expr=[100 * CAST(sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END)@0 AS Float64) / CAST(sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 AS Float64) as promo_revenue] + AggregateExec: mode=Final, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalescePartitionsExec + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([], 4)) diff --git a/testdata/expected-plans/q16.txt b/testdata/expected-plans/q16.txt index 6713d19..72e90d3 100644 --- a/testdata/expected-plans/q16.txt +++ b/testdata/expected-plans/q16.txt @@ -19,43 +19,97 @@ Sort: supplier_cnt DESC NULLS FIRST, part.p_brand ASC NULLS LAST, part.p_type AS DataFusion Physical Plan ======================== -SortExec: expr=[supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 ASC NULLS LAST,p_size@2 ASC NULLS LAST], preserve_partitioning=[false] - ProjectionExec: expr=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size, count(alias1)@3 as supplier_cnt] - AggregateExec: mode=Single, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size], aggr=[count(alias1)] - AggregateExec: mode=Single, gby=[p_brand@1 as p_brand, p_type@2 as p_type, p_size@3 as p_size, ps_suppkey@0 as alias1], aggr=[] +SortPreservingMergeExec: [supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 ASC NULLS LAST,p_size@2 ASC NULLS LAST] + SortExec: expr=[supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 ASC NULLS LAST,p_size@2 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size, count(alias1)@3 as supplier_cnt] + AggregateExec: mode=FinalPartitioned, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size], aggr=[count(alias1)] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=RightAnti, on=[(s_suppkey@0, ps_suppkey@0)] - ProjectionExec: expr=[s_suppkey@0 as s_suppkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: s_comment@1 LIKE %Customer%Complaints% - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_comment], predicate=s_comment@6 LIKE %Customer%Complaints% - ProjectionExec: expr=[ps_suppkey@3 as ps_suppkey, p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_brand@1, p_type@2, p_size@3, ps_suppkey@5] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_brand@1 != Brand#14 AND p_type@2 NOT LIKE SMALL PLATED% AND Use p_size@3 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]) - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_type, p_size], predicate=p_brand@3 != Brand#14 AND p_type@4 NOT LIKE SMALL PLATED% AND Use p_size@5 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]), pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 != Brand#14 OR Brand#14 != p_brand_max@1 END AND (CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 14 AND 14 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 6 AND 6 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 5 AND 5 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 31 AND 31 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 49 AND 49 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 15 AND 15 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 41 AND 41 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 47 AND 47 <= p_size_max@5 END), required_guarantees=[p_brand not in (Brand#14), p_size in (6, 31, 41, 15, 49, 5, 14, 47)] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey] + RepartitionExec: partitioning=Hash([p_brand@0, p_type@1, p_size@2], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size], aggr=[count(alias1)] + AggregateExec: mode=FinalPartitioned, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size, alias1@3 as alias1], aggr=[] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([p_brand@0, p_type@1, p_size@2, alias1@3], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[p_brand@1 as p_brand, p_type@2 as p_type, p_size@3 as p_size, ps_suppkey@0 as alias1], aggr=[] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=RightAnti, on=[(s_suppkey@0, ps_suppkey@0)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 + ProjectionExec: expr=[s_suppkey@0 as s_suppkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: s_comment@1 LIKE %Customer%Complaints% + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_comment], predicate=s_comment@6 LIKE %Customer%Complaints% + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4 + ProjectionExec: expr=[ps_suppkey@3 as ps_suppkey, p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_brand@1, p_type@2, p_size@3, ps_suppkey@5] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_brand@1 != Brand#14 AND p_type@2 NOT LIKE SMALL PLATED% AND Use p_size@3 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]) + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_brand, p_type, p_size], predicate=p_brand@3 != Brand#14 AND p_type@4 NOT LIKE SMALL PLATED% AND Use p_size@5 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]), pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 != Brand#14 OR Brand#14 != p_brand_max@1 END AND (CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 14 AND 14 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 6 AND 6 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 5 AND 5 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 31 AND 31 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 49 AND 49 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 15 AND 15 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 41 AND 41 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 47 AND 47 <= p_size_max@5 END), required_guarantees=[p_brand not in (Brand#14), p_size in (5, 31, 49, 47, 15, 14, 41, 6)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10445614], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10445614..20890506, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..722], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:722..10446336], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10446336..20891947]]}, projection=[ps_partkey, ps_suppkey] RaySQL Plan =========== -Query Stage #0 (1 -> 1): -SortExec: expr=[supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 ASC NULLS LAST,p_size@2 ASC NULLS LAST], preserve_partitioning=[false] - ProjectionExec: expr=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size, count(alias1)@3 as supplier_cnt] - AggregateExec: mode=Single, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size], aggr=[count(alias1)] - AggregateExec: mode=Single, gby=[p_brand@1 as p_brand, p_type@2 as p_type, p_size@3 as p_size, ps_suppkey@0 as alias1], aggr=[] +Query Stage #0 (2 -> 4): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) + ProjectionExec: expr=[s_suppkey@0 as s_suppkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: s_comment@1 LIKE %Customer%Complaints% + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_comment], predicate=s_comment@6 LIKE %Customer%Complaints% + +Query Stage #1 (2 -> 4): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_brand@1 != Brand#14 AND p_type@2 NOT LIKE SMALL PLATED% AND Use p_size@3 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]) + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_brand, p_type, p_size], predicate=p_brand@3 != Brand#14 AND p_type@4 NOT LIKE SMALL PLATED% AND Use p_size@5 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]), pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 != Brand#14 OR Brand#14 != p_brand_max@1 END AND (CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 14 AND 14 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 6 AND 6 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 5 AND 5 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 31 AND 31 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 49 AND 49 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 15 AND 15 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 41 AND 41 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 47 AND 47 <= p_size_max@5 END), required_guarantees=[p_brand not in (Brand#14), p_size in (5, 31, 49, 47, 15, 14, 41, 6)] + +Query Stage #2 (4 -> 4): +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10445614], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10445614..20890506, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..722], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:722..10446336], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10446336..20891947]]}, projection=[ps_partkey, ps_suppkey] + +Query Stage #3 (4 -> 4): +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4)) + ProjectionExec: expr=[ps_suppkey@3 as ps_suppkey, p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_brand@1, p_type@2, p_size@3, ps_suppkey@5] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) + +Query Stage #4 (4 -> 4): +ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "p_brand", index: 0 }, Column { name: "p_type", index: 1 }, Column { name: "p_size", index: 2 }, Column { name: "alias1", index: 3 }], 4)) + AggregateExec: mode=Partial, gby=[p_brand@1 as p_brand, p_type@2 as p_type, p_size@3 as p_size, ps_suppkey@0 as alias1], aggr=[] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=RightAnti, on=[(s_suppkey@0, ps_suppkey@0)] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=RightAnti, on=[(s_suppkey@0, ps_suppkey@0)] - ProjectionExec: expr=[s_suppkey@0 as s_suppkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: s_comment@1 LIKE %Customer%Complaints% - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_comment], predicate=s_comment@6 LIKE %Customer%Complaints% - ProjectionExec: expr=[ps_suppkey@3 as ps_suppkey, p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_brand@1, p_type@2, p_size@3, ps_suppkey@5] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_brand@1 != Brand#14 AND p_type@2 NOT LIKE SMALL PLATED% AND Use p_size@3 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]) - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_type, p_size], predicate=p_brand@3 != Brand#14 AND p_type@4 NOT LIKE SMALL PLATED% AND Use p_size@5 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]), pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 != Brand#14 OR Brand#14 != p_brand_max@1 END AND (CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 14 AND 14 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 6 AND 6 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 5 AND 5 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 31 AND 31 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 49 AND 49 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 15 AND 15 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 41 AND 41 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 47 AND 47 <= p_size_max@5 END), required_guarantees=[p_brand not in (Brand#14), p_size in (6, 31, 41, 15, 49, 5, 14, 47)] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey] + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4)) + +Query Stage #5 (4 -> 4): +ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "p_brand", index: 0 }, Column { name: "p_type", index: 1 }, Column { name: "p_size", index: 2 }], 4)) + AggregateExec: mode=Partial, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size], aggr=[count(alias1)] + AggregateExec: mode=FinalPartitioned, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size, alias1@3 as alias1], aggr=[] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "p_brand", index: 0 }, Column { name: "p_type", index: 1 }, Column { name: "p_size", index: 2 }, Column { name: "alias1", index: 3 }], 4)) + +Query Stage #6 (4 -> 4): +ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "p_brand", index: 0 }, Column { name: "p_type", index: 1 }, Column { name: "p_size", index: 2 }], 4)) + SortExec: expr=[supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 ASC NULLS LAST,p_size@2 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size, count(alias1)@3 as supplier_cnt] + AggregateExec: mode=FinalPartitioned, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size], aggr=[count(alias1)] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "p_brand", index: 0 }, Column { name: "p_type", index: 1 }, Column { name: "p_size", index: 2 }], 4)) + +Query Stage #7 (4 -> 1): +SortPreservingMergeExec: [supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 ASC NULLS LAST,p_size@2 ASC NULLS LAST] + ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "p_brand", index: 0 }, Column { name: "p_type", index: 1 }, Column { name: "p_size", index: 2 }], 4)) diff --git a/testdata/expected-plans/q17.txt b/testdata/expected-plans/q17.txt index 251bc45..b159323 100644 --- a/testdata/expected-plans/q17.txt +++ b/testdata/expected-plans/q17.txt @@ -20,38 +20,70 @@ DataFusion Physical Plan ======================== ProjectionExec: expr=[CAST(sum(lineitem.l_extendedprice)@0 AS Float64) / 7 as avg_yearly] - AggregateExec: mode=Single, gby=[], aggr=[sum(lineitem.l_extendedprice)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@2, l_partkey@1)], filter=CAST(l_quantity@0 AS Decimal128(30, 15)) < Float64(0.2) * avg(lineitem.l_quantity)@1, projection=[l_extendedprice@1] - ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, p_partkey@0 as p_partkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_partkey@0, l_quantity@2, l_extendedprice@3] - ProjectionExec: expr=[p_partkey@0 as p_partkey] + AggregateExec: mode=Final, gby=[], aggr=[sum(lineitem.l_extendedprice)] + CoalescePartitionsExec + AggregateExec: mode=Partial, gby=[], aggr=[sum(lineitem.l_extendedprice)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@2, l_partkey@1)], filter=CAST(l_quantity@0 AS Decimal128(30, 15)) < Float64(0.2) * avg(lineitem.l_quantity)@1, projection=[l_extendedprice@1] + ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_partkey@0, l_quantity@2, l_extendedprice@3] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 + ProjectionExec: expr=[p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_brand@1 = Brand#42 AND p_container@2 = LG BAG + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_brand, p_container], predicate=p_brand@3 = Brand#42 AND p_container@6 = LG BAG, pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 <= Brand#42 AND Brand#42 <= p_brand_max@1 END AND CASE WHEN p_container_null_count@6 = p_container_row_count@7 THEN false ELSE p_container_min@4 <= LG BAG AND LG BAG <= p_container_max@5 END, required_guarantees=[p_brand in (Brand#42), p_container in (LG BAG)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_partkey, l_quantity, l_extendedprice] + ProjectionExec: expr=[CAST(0.2 * CAST(avg(lineitem.l_quantity)@1 AS Float64) AS Decimal128(30, 15)) as Float64(0.2) * avg(lineitem.l_quantity), l_partkey@0 as l_partkey] + AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_brand@1 = Brand#42 AND p_container@2 = LG BAG - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_container], predicate=p_brand@3 = Brand#42 AND p_container@6 = LG BAG, pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 <= Brand#42 AND Brand#42 <= p_brand_max@1 END AND CASE WHEN p_container_null_count@6 = p_container_row_count@7 THEN false ELSE p_container_min@4 <= LG BAG AND LG BAG <= p_container_max@5 END, required_guarantees=[p_brand in (Brand#42), p_container in (LG BAG)] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_quantity, l_extendedprice] - ProjectionExec: expr=[CAST(0.2 * CAST(avg(lineitem.l_quantity)@1 AS Float64) AS Decimal128(30, 15)) as Float64(0.2) * avg(lineitem.l_quantity), l_partkey@0 as l_partkey] - AggregateExec: mode=Single, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_quantity] + RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_partkey, l_quantity] RaySQL Plan =========== -Query Stage #0 (1 -> 1): -ProjectionExec: expr=[CAST(sum(lineitem.l_extendedprice)@0 AS Float64) / 7 as avg_yearly] - AggregateExec: mode=Single, gby=[], aggr=[sum(lineitem.l_extendedprice)] +Query Stage #0 (2 -> 4): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) + ProjectionExec: expr=[p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_brand@1 = Brand#42 AND p_container@2 = LG BAG + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_brand, p_container], predicate=p_brand@3 = Brand#42 AND p_container@6 = LG BAG, pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 <= Brand#42 AND Brand#42 <= p_brand_max@1 END AND CASE WHEN p_container_null_count@6 = p_container_row_count@7 THEN false ELSE p_container_min@4 <= LG BAG AND LG BAG <= p_container_max@5 END, required_guarantees=[p_brand in (Brand#42), p_container in (LG BAG)] + +Query Stage #1 (4 -> 4): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_partkey, l_quantity, l_extendedprice] + +Query Stage #2 (4 -> 4): +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) + AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_partkey, l_quantity] + +Query Stage #3 (4 -> 1): +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([], 4)) + AggregateExec: mode=Partial, gby=[], aggr=[sum(lineitem.l_extendedprice)] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@2, l_partkey@1)], filter=CAST(l_quantity@0 AS Decimal128(30, 15)) < Float64(0.2) * avg(lineitem.l_quantity)@1, projection=[l_extendedprice@1] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@2, l_partkey@1)], filter=CAST(l_quantity@0 AS Decimal128(30, 15)) < Float64(0.2) * avg(lineitem.l_quantity)@1, projection=[l_extendedprice@1] ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, p_partkey@0 as p_partkey] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_partkey@0, l_quantity@2, l_extendedprice@3] - ProjectionExec: expr=[p_partkey@0 as p_partkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_brand@1 = Brand#42 AND p_container@2 = LG BAG - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_container], predicate=p_brand@3 = Brand#42 AND p_container@6 = LG BAG, pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 <= Brand#42 AND Brand#42 <= p_brand_max@1 END AND CASE WHEN p_container_null_count@6 = p_container_row_count@7 THEN false ELSE p_container_min@4 <= LG BAG AND LG BAG <= p_container_max@5 END, required_guarantees=[p_brand in (Brand#42), p_container in (LG BAG)] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_quantity, l_extendedprice] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_partkey@0, l_quantity@2, l_extendedprice@3] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) ProjectionExec: expr=[CAST(0.2 * CAST(avg(lineitem.l_quantity)@1 AS Float64) AS Decimal128(30, 15)) as Float64(0.2) * avg(lineitem.l_quantity), l_partkey@0 as l_partkey] - AggregateExec: mode=Single, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_quantity] + AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) + +Query Stage #4 (1 -> 1): +ProjectionExec: expr=[CAST(sum(lineitem.l_extendedprice)@0 AS Float64) / 7 as avg_yearly] + AggregateExec: mode=Final, gby=[], aggr=[sum(lineitem.l_extendedprice)] + CoalescePartitionsExec + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([], 4)) diff --git a/testdata/expected-plans/q18.txt b/testdata/expected-plans/q18.txt index 659fb02..a866530 100644 --- a/testdata/expected-plans/q18.txt +++ b/testdata/expected-plans/q18.txt @@ -21,41 +21,95 @@ Limit: skip=0, fetch=100 DataFusion Physical Plan ======================== -SortExec: TopK(fetch=100), expr=[o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAST], preserve_partitioning=[false] - AggregateExec: mode=Single, gby=[c_name@1 as c_name, c_custkey@0 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@4 as o_orderdate, o_totalprice@3 as o_totalprice], aggr=[sum(lineitem.l_quantity)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(l_orderkey@0, o_orderkey@2)] - ProjectionExec: expr=[l_orderkey@0 as l_orderkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: sum(lineitem.l_quantity)@1 > Some(31300),21,2 - AggregateExec: mode=Single, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_quantity] +GlobalLimitExec: skip=0, fetch=100 + SortPreservingMergeExec: [o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAST], fetch=100 + SortExec: TopK(fetch=100), expr=[o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAST], preserve_partitioning=[true] + AggregateExec: mode=FinalPartitioned, gby=[c_name@0 as c_name, c_custkey@1 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@3 as o_orderdate, o_totalprice@4 as o_totalprice], aggr=[sum(lineitem.l_quantity)] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@2, l_orderkey@0)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@3, o_orderdate@4, l_quantity@6] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@4, o_orderdate@5] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_name] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_quantity] + RepartitionExec: partitioning=Hash([c_name@0, c_custkey@1, o_orderkey@2, o_orderdate@3, o_totalprice@4], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[c_name@1 as c_name, c_custkey@0 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@4 as o_orderdate, o_totalprice@3 as o_totalprice], aggr=[sum(lineitem.l_quantity)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=RightSemi, on=[(l_orderkey@0, o_orderkey@2)] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: sum(lineitem.l_quantity)@1 > Some(31300),21,2 + AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_quantity] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@2, l_orderkey@0)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@3, o_orderdate@4, l_quantity@6] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_orderkey@2], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@4, o_orderdate@5] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3390424], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3390424..6780848], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6780848..6782476, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3388796], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3388796..6779220]]}, projection=[c_custkey, c_name] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_quantity] RaySQL Plan =========== -Query Stage #0 (1 -> 1): -SortExec: TopK(fetch=100), expr=[o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAST], preserve_partitioning=[false] - AggregateExec: mode=Single, gby=[c_name@1 as c_name, c_custkey@0 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@4 as o_orderdate, o_totalprice@3 as o_totalprice], aggr=[sum(lineitem.l_quantity)] +Query Stage #0 (4 -> 4): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) + AggregateExec: mode=Partial, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_quantity] + +Query Stage #1 (4 -> 4): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3390424], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3390424..6780848], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6780848..6782476, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3388796], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3388796..6779220]]}, projection=[c_custkey, c_name] + +Query Stage #2 (4 -> 4): +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] + +Query Stage #3 (4 -> 4): +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "o_orderkey", index: 2 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@4, o_orderdate@5] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) + +Query Stage #4 (4 -> 4): +ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_quantity] + +Query Stage #5 (4 -> 4): +ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "c_name", index: 0 }, Column { name: "c_custkey", index: 1 }, Column { name: "o_orderkey", index: 2 }, Column { name: "o_orderdate", index: 3 }, Column { name: "o_totalprice", index: 4 }], 4)) + AggregateExec: mode=Partial, gby=[c_name@1 as c_name, c_custkey@0 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@4 as o_orderdate, o_totalprice@3 as o_totalprice], aggr=[sum(lineitem.l_quantity)] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(l_orderkey@0, o_orderkey@2)] + HashJoinExec: mode=Partitioned, join_type=RightSemi, on=[(l_orderkey@0, o_orderkey@2)] ProjectionExec: expr=[l_orderkey@0 as l_orderkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: sum(lineitem.l_quantity)@1 > Some(31300),21,2 - AggregateExec: mode=Single, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_quantity] + AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@2, l_orderkey@0)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@3, o_orderdate@4, l_quantity@6] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@2, l_orderkey@0)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@3, o_orderdate@4, l_quantity@6] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@4, o_orderdate@5] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_name] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_quantity] + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "o_orderkey", index: 2 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) + +Query Stage #6 (4 -> 4): +ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "c_name", index: 0 }, Column { name: "c_custkey", index: 1 }, Column { name: "o_orderkey", index: 2 }, Column { name: "o_orderdate", index: 3 }, Column { name: "o_totalprice", index: 4 }], 4)) + SortExec: TopK(fetch=100), expr=[o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAST], preserve_partitioning=[true] + AggregateExec: mode=FinalPartitioned, gby=[c_name@0 as c_name, c_custkey@1 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@3 as o_orderdate, o_totalprice@4 as o_totalprice], aggr=[sum(lineitem.l_quantity)] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "c_name", index: 0 }, Column { name: "c_custkey", index: 1 }, Column { name: "o_orderkey", index: 2 }, Column { name: "o_orderdate", index: 3 }, Column { name: "o_totalprice", index: 4 }], 4)) + +Query Stage #7 (1 -> 1): +GlobalLimitExec: skip=0, fetch=100 + SortPreservingMergeExec: [o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAST], fetch=100 + ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "c_name", index: 0 }, Column { name: "c_custkey", index: 1 }, Column { name: "o_orderkey", index: 2 }, Column { name: "o_orderdate", index: 3 }, Column { name: "o_totalprice", index: 4 }], 4)) diff --git a/testdata/expected-plans/q19.txt b/testdata/expected-plans/q19.txt index 748af1f..b7f0b76 100644 --- a/testdata/expected-plans/q19.txt +++ b/testdata/expected-plans/q19.txt @@ -15,30 +15,53 @@ DataFusion Physical Plan ======================== ProjectionExec: expr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@0 as revenue] - AggregateExec: mode=Single, gby=[], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], filter=p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND l_quantity@0 >= Some(800),11,2 AND l_quantity@0 <= Some(1800),11,2 AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND l_quantity@0 >= Some(2000),11,2 AND l_quantity@0 <= Some(3000),11,2 AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND l_quantity@0 >= Some(3000),11,2 AND l_quantity@0 <= Some(4000),11,2 AND p_size@2 <= 15, projection=[l_extendedprice@6, l_discount@7] + AggregateExec: mode=Final, gby=[], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalescePartitionsExec + AggregateExec: mode=Partial, gby=[], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: (p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@2 <= 15) AND p_size@2 >= 1 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_size, p_container], predicate=p_size@5 >= 1 AND (p_brand@3 = Brand#21 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@5 <= 5 OR p_brand@3 = Brand#13 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@5 <= 10 OR p_brand@3 = Brand#52 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@5 <= 15), pruning_predicate=CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_max@0 >= 1 END AND (CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#21 AND Brand#21 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM CASE AND SM CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM BOX AND SM BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PACK AND SM PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PKG AND SM PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 5 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#13 AND Brand#13 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BAG AND MED BAG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BOX AND MED BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PKG AND MED PKG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PACK AND MED PACK <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 10 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#52 AND Brand#52 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG CASE AND LG CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG BOX AND LG BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PACK AND LG PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PKG AND LG PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 15 END), required_guarantees=[] - ProjectionExec: expr=[l_partkey@0 as l_partkey, l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: (l_quantity@1 >= Some(800),11,2 AND l_quantity@1 <= Some(1800),11,2 OR l_quantity@1 >= Some(2000),11,2 AND l_quantity@1 <= Some(3000),11,2 OR l_quantity@1 >= Some(3000),11,2 AND l_quantity@1 <= Some(4000),11,2) AND (l_shipmode@5 = AIR OR l_shipmode@5 = AIR REG) AND l_shipinstruct@4 = DELIVER IN PERSON - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], predicate=(l_shipmode@14 = AIR OR l_shipmode@14 = AIR REG) AND l_shipinstruct@13 = DELIVER IN PERSON AND (l_quantity@4 >= Some(800),11,2 AND l_quantity@4 <= Some(1800),11,2 OR l_quantity@4 >= Some(2000),11,2 AND l_quantity@4 <= Some(3000),11,2 OR l_quantity@4 >= Some(3000),11,2 AND l_quantity@4 <= Some(4000),11,2), pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR AND AIR <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR REG AND AIR REG <= l_shipmode_max@1 END) AND CASE WHEN l_shipinstruct_null_count@6 = l_shipinstruct_row_count@7 THEN false ELSE l_shipinstruct_min@4 <= DELIVER IN PERSON AND DELIVER IN PERSON <= l_shipinstruct_max@5 END AND (CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(800),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(1800),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(2000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(3000),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(3000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(4000),11,2 END), required_guarantees=[l_shipmode in (AIR REG, AIR), l_shipinstruct in (DELIVER IN PERSON)] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], filter=p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND l_quantity@0 >= Some(800),11,2 AND l_quantity@0 <= Some(1800),11,2 AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND l_quantity@0 >= Some(2000),11,2 AND l_quantity@0 <= Some(3000),11,2 AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND l_quantity@0 >= Some(3000),11,2 AND l_quantity@0 <= Some(4000),11,2 AND p_size@2 <= 15, projection=[l_extendedprice@6, l_discount@7] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: (p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@2 <= 15) AND p_size@2 >= 1 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_brand, p_size, p_container], predicate=p_size@5 >= 1 AND (p_brand@3 = Brand#21 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@5 <= 5 OR p_brand@3 = Brand#13 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@5 <= 10 OR p_brand@3 = Brand#52 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@5 <= 15), pruning_predicate=CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_max@0 >= 1 END AND (CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#21 AND Brand#21 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM CASE AND SM CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM BOX AND SM BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PACK AND SM PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PKG AND SM PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 5 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#13 AND Brand#13 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BAG AND MED BAG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BOX AND MED BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PKG AND MED PKG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PACK AND MED PACK <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 10 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#52 AND Brand#52 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG CASE AND LG CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG BOX AND LG BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PACK AND LG PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PKG AND LG PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 15 END), required_guarantees=[] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4 + ProjectionExec: expr=[l_partkey@0 as l_partkey, l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: (l_quantity@1 >= Some(800),11,2 AND l_quantity@1 <= Some(1800),11,2 OR l_quantity@1 >= Some(2000),11,2 AND l_quantity@1 <= Some(3000),11,2 OR l_quantity@1 >= Some(3000),11,2 AND l_quantity@1 <= Some(4000),11,2) AND (l_shipmode@5 = AIR OR l_shipmode@5 = AIR REG) AND l_shipinstruct@4 = DELIVER IN PERSON + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], predicate=(l_shipmode@14 = AIR OR l_shipmode@14 = AIR REG) AND l_shipinstruct@13 = DELIVER IN PERSON AND (l_quantity@4 >= Some(800),11,2 AND l_quantity@4 <= Some(1800),11,2 OR l_quantity@4 >= Some(2000),11,2 AND l_quantity@4 <= Some(3000),11,2 OR l_quantity@4 >= Some(3000),11,2 AND l_quantity@4 <= Some(4000),11,2), pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR AND AIR <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR REG AND AIR REG <= l_shipmode_max@1 END) AND CASE WHEN l_shipinstruct_null_count@6 = l_shipinstruct_row_count@7 THEN false ELSE l_shipinstruct_min@4 <= DELIVER IN PERSON AND DELIVER IN PERSON <= l_shipinstruct_max@5 END AND (CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(800),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(1800),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(2000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(3000),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(3000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(4000),11,2 END), required_guarantees=[l_shipmode in (AIR, AIR REG), l_shipinstruct in (DELIVER IN PERSON)] RaySQL Plan =========== -Query Stage #0 (1 -> 1): -ProjectionExec: expr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@0 as revenue] - AggregateExec: mode=Single, gby=[], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] +Query Stage #0 (2 -> 4): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: (p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@2 <= 15) AND p_size@2 >= 1 + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_brand, p_size, p_container], predicate=p_size@5 >= 1 AND (p_brand@3 = Brand#21 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@5 <= 5 OR p_brand@3 = Brand#13 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@5 <= 10 OR p_brand@3 = Brand#52 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@5 <= 15), pruning_predicate=CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_max@0 >= 1 END AND (CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#21 AND Brand#21 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM CASE AND SM CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM BOX AND SM BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PACK AND SM PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PKG AND SM PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 5 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#13 AND Brand#13 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BAG AND MED BAG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BOX AND MED BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PKG AND MED PKG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PACK AND MED PACK <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 10 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#52 AND Brand#52 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG CASE AND LG CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG BOX AND LG BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PACK AND LG PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PKG AND LG PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 15 END), required_guarantees=[] + +Query Stage #1 (4 -> 4): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) + ProjectionExec: expr=[l_partkey@0 as l_partkey, l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: (l_quantity@1 >= Some(800),11,2 AND l_quantity@1 <= Some(1800),11,2 OR l_quantity@1 >= Some(2000),11,2 AND l_quantity@1 <= Some(3000),11,2 OR l_quantity@1 >= Some(3000),11,2 AND l_quantity@1 <= Some(4000),11,2) AND (l_shipmode@5 = AIR OR l_shipmode@5 = AIR REG) AND l_shipinstruct@4 = DELIVER IN PERSON + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], predicate=(l_shipmode@14 = AIR OR l_shipmode@14 = AIR REG) AND l_shipinstruct@13 = DELIVER IN PERSON AND (l_quantity@4 >= Some(800),11,2 AND l_quantity@4 <= Some(1800),11,2 OR l_quantity@4 >= Some(2000),11,2 AND l_quantity@4 <= Some(3000),11,2 OR l_quantity@4 >= Some(3000),11,2 AND l_quantity@4 <= Some(4000),11,2), pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR AND AIR <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR REG AND AIR REG <= l_shipmode_max@1 END) AND CASE WHEN l_shipinstruct_null_count@6 = l_shipinstruct_row_count@7 THEN false ELSE l_shipinstruct_min@4 <= DELIVER IN PERSON AND DELIVER IN PERSON <= l_shipinstruct_max@5 END AND (CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(800),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(1800),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(2000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(3000),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(3000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(4000),11,2 END), required_guarantees=[l_shipmode in (AIR, AIR REG), l_shipinstruct in (DELIVER IN PERSON)] + +Query Stage #2 (4 -> 1): +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([], 4)) + AggregateExec: mode=Partial, gby=[], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], filter=p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND l_quantity@0 >= Some(800),11,2 AND l_quantity@0 <= Some(1800),11,2 AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND l_quantity@0 >= Some(2000),11,2 AND l_quantity@0 <= Some(3000),11,2 AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND l_quantity@0 >= Some(3000),11,2 AND l_quantity@0 <= Some(4000),11,2 AND p_size@2 <= 15, projection=[l_extendedprice@6, l_discount@7] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], filter=p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND l_quantity@0 >= Some(800),11,2 AND l_quantity@0 <= Some(1800),11,2 AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND l_quantity@0 >= Some(2000),11,2 AND l_quantity@0 <= Some(3000),11,2 AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND l_quantity@0 >= Some(3000),11,2 AND l_quantity@0 <= Some(4000),11,2 AND p_size@2 <= 15, projection=[l_extendedprice@6, l_discount@7] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) CoalesceBatchesExec: target_batch_size=8192 - FilterExec: (p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@2 <= 15) AND p_size@2 >= 1 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_size, p_container], predicate=p_size@5 >= 1 AND (p_brand@3 = Brand#21 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@5 <= 5 OR p_brand@3 = Brand#13 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@5 <= 10 OR p_brand@3 = Brand#52 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@5 <= 15), pruning_predicate=CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_max@0 >= 1 END AND (CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#21 AND Brand#21 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM CASE AND SM CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM BOX AND SM BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PACK AND SM PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PKG AND SM PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 5 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#13 AND Brand#13 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BAG AND MED BAG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BOX AND MED BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PKG AND MED PKG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PACK AND MED PACK <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 10 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#52 AND Brand#52 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG CASE AND LG CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG BOX AND LG BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PACK AND LG PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PKG AND LG PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 15 END), required_guarantees=[] - ProjectionExec: expr=[l_partkey@0 as l_partkey, l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: (l_quantity@1 >= Some(800),11,2 AND l_quantity@1 <= Some(1800),11,2 OR l_quantity@1 >= Some(2000),11,2 AND l_quantity@1 <= Some(3000),11,2 OR l_quantity@1 >= Some(3000),11,2 AND l_quantity@1 <= Some(4000),11,2) AND (l_shipmode@5 = AIR OR l_shipmode@5 = AIR REG) AND l_shipinstruct@4 = DELIVER IN PERSON - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], predicate=(l_shipmode@14 = AIR OR l_shipmode@14 = AIR REG) AND l_shipinstruct@13 = DELIVER IN PERSON AND (l_quantity@4 >= Some(800),11,2 AND l_quantity@4 <= Some(1800),11,2 OR l_quantity@4 >= Some(2000),11,2 AND l_quantity@4 <= Some(3000),11,2 OR l_quantity@4 >= Some(3000),11,2 AND l_quantity@4 <= Some(4000),11,2), pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR AND AIR <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR REG AND AIR REG <= l_shipmode_max@1 END) AND CASE WHEN l_shipinstruct_null_count@6 = l_shipinstruct_row_count@7 THEN false ELSE l_shipinstruct_min@4 <= DELIVER IN PERSON AND DELIVER IN PERSON <= l_shipinstruct_max@5 END AND (CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(800),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(1800),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(2000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(3000),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(3000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(4000),11,2 END), required_guarantees=[l_shipmode in (AIR REG, AIR), l_shipinstruct in (DELIVER IN PERSON)] + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) + +Query Stage #3 (1 -> 1): +ProjectionExec: expr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@0 as revenue] + AggregateExec: mode=Final, gby=[], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalescePartitionsExec + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([], 4)) diff --git a/testdata/expected-plans/q2.txt b/testdata/expected-plans/q2.txt index e36f6ba..fc6e279 100644 --- a/testdata/expected-plans/q2.txt +++ b/testdata/expected-plans/q2.txt @@ -41,93 +41,227 @@ Limit: skip=0, fetch=100 DataFusion Physical Plan ======================== -SortExec: TopK(fetch=100), expr=[s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST], preserve_partitioning=[false] - ProjectionExec: expr=[s_acctbal@5 as s_acctbal, s_name@2 as s_name, n_name@7 as n_name, p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, s_address@3 as s_address, s_phone@4 as s_phone, s_comment@6 as s_comment] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, ps_partkey@1), (ps_supplycost@7, min(partsupp.ps_supplycost)@0)], projection=[p_partkey@0, p_mfgr@1, s_name@2, s_address@3, s_phone@4, s_acctbal@5, s_comment@6, n_name@8] +GlobalLimitExec: skip=0, fetch=100 + SortPreservingMergeExec: [s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST], fetch=100 + SortExec: TopK(fetch=100), expr=[s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[s_acctbal@5 as s_acctbal, s_name@2 as s_name, n_name@7 as n_name, p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, s_address@3 as s_address, s_phone@4 as s_phone, s_comment@6 as s_comment] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(r_regionkey@0, n_regionkey@9)], projection=[p_partkey@1, p_mfgr@2, s_name@3, s_address@4, s_phone@5, s_acctbal@6, s_comment@7, ps_supplycost@8, n_name@9] - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = ASIA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] - ProjectionExec: expr=[p_partkey@2 as p_partkey, p_mfgr@3 as p_mfgr, s_name@4 as s_name, s_address@5 as s_address, s_phone@6 as s_phone, s_acctbal@7 as s_acctbal, s_comment@8 as s_comment, ps_supplycost@9 as ps_supplycost, n_name@0 as n_name, n_regionkey@1 as n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@4)], projection=[n_name@1, n_regionkey@2, p_partkey@3, p_mfgr@4, s_name@5, s_address@6, s_phone@8, s_acctbal@9, s_comment@10, ps_supplycost@11] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] - ProjectionExec: expr=[p_partkey@6 as p_partkey, p_mfgr@7 as p_mfgr, s_name@0 as s_name, s_address@1 as s_address, s_nationkey@2 as s_nationkey, s_phone@3 as s_phone, s_acctbal@4 as s_acctbal, s_comment@5 as s_comment, ps_supplycost@8 as ps_supplycost] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@1), (ps_supplycost@7, min(partsupp.ps_supplycost)@0)], projection=[p_partkey@0, p_mfgr@1, s_name@2, s_address@3, s_phone@4, s_acctbal@5, s_comment@6, n_name@8] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([p_partkey@0, ps_supplycost@7], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@9)], projection=[p_partkey@1, p_mfgr@2, s_name@3, s_address@4, s_phone@5, s_acctbal@6, s_comment@7, ps_supplycost@8, n_name@9] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: r_name@1 = ASIA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@2)], projection=[s_name@1, s_address@2, s_nationkey@3, s_phone@4, s_acctbal@5, s_comment@6, p_partkey@7, p_mfgr@8, ps_supplycost@10] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_partkey@0, p_mfgr@1, ps_suppkey@3, ps_supplycost@4] - ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr] + RepartitionExec: partitioning=Hash([n_regionkey@9], 4), input_partitions=4 + ProjectionExec: expr=[p_partkey@2 as p_partkey, p_mfgr@3 as p_mfgr, s_name@4 as s_name, s_address@5 as s_address, s_phone@6 as s_phone, s_acctbal@7 as s_acctbal, s_comment@8 as s_comment, ps_supplycost@9 as ps_supplycost, n_name@0 as n_name, n_regionkey@1 as n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@4)], projection=[n_name@1, n_regionkey@2, p_partkey@3, p_mfgr@4, s_name@5, s_address@6, s_phone@8, s_acctbal@9, s_comment@10, ps_supplycost@11] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_size@3 = 48 AND p_type@2 LIKE %TIN - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_mfgr, p_type, p_size], predicate=p_size@5 = 48 AND p_type@4 LIKE %TIN, pruning_predicate=CASE WHEN p_size_null_count@2 = p_size_row_count@3 THEN false ELSE p_size_min@0 <= 48 AND 48 <= p_size_max@1 END, required_guarantees=[p_size in (48)] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] - ProjectionExec: expr=[min(partsupp.ps_supplycost)@1 as min(partsupp.ps_supplycost), ps_partkey@0 as ps_partkey] - AggregateExec: mode=Single, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)] + RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_nationkey@4], 4), input_partitions=4 + ProjectionExec: expr=[p_partkey@6 as p_partkey, p_mfgr@7 as p_mfgr, s_name@0 as s_name, s_address@1 as s_address, s_nationkey@2 as s_nationkey, s_phone@3 as s_phone, s_acctbal@4 as s_acctbal, s_comment@5 as s_comment, ps_supplycost@8 as ps_supplycost] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@2)], projection=[s_name@1, s_address@2, s_nationkey@3, s_phone@4, s_acctbal@5, s_comment@6, p_partkey@7, p_mfgr@8, ps_supplycost@10] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([ps_suppkey@2], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_partkey@0, p_mfgr@1, ps_suppkey@3, ps_supplycost@4] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 + ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_size@3 = 48 AND p_type@2 LIKE %TIN + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_mfgr, p_type, p_size], predicate=p_size@5 = 48 AND p_type@4 LIKE %TIN, pruning_predicate=CASE WHEN p_size_null_count@2 = p_size_row_count@3 THEN false ELSE p_size_min@0 <= 48 AND 48 <= p_size_max@1 END, required_guarantees=[p_size in (48)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10445614], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10445614..20890506, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..722], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:722..10446336], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10446336..20891947]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(r_regionkey@0, n_regionkey@2)], projection=[ps_partkey@1, ps_supplycost@2] - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = ASIA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] - ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, n_regionkey@0 as n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_regionkey@1, ps_partkey@2, ps_supplycost@3] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_regionkey] - ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_supplycost@4] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + RepartitionExec: partitioning=Hash([ps_partkey@1, min(partsupp.ps_supplycost)@0], 4), input_partitions=4 + ProjectionExec: expr=[min(partsupp.ps_supplycost)@1 as min(partsupp.ps_supplycost), ps_partkey@0 as ps_partkey] + AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@2)], projection=[ps_partkey@1, ps_supplycost@2] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: r_name@1 = ASIA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([n_regionkey@2], 4), input_partitions=4 + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, n_regionkey@0 as n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_regionkey@1, ps_partkey@2, ps_supplycost@3] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4 + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_supplycost@4] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([ps_suppkey@1], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10445614], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10445614..20890506, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..722], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:722..10446336], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10446336..20891947]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] RaySQL Plan =========== -Query Stage #0 (1 -> 1): -SortExec: TopK(fetch=100), expr=[s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST], preserve_partitioning=[false] - ProjectionExec: expr=[s_acctbal@5 as s_acctbal, s_name@2 as s_name, n_name@7 as n_name, p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, s_address@3 as s_address, s_phone@4 as s_phone, s_comment@6 as s_comment] +Query Stage #0 (1 -> 4): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: r_name@1 = ASIA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] + +Query Stage #1 (1 -> 4): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] + +Query Stage #2 (2 -> 4): +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] + +Query Stage #3 (2 -> 4): +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) + ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, ps_partkey@1), (ps_supplycost@7, min(partsupp.ps_supplycost)@0)], projection=[p_partkey@0, p_mfgr@1, s_name@2, s_address@3, s_phone@4, s_acctbal@5, s_comment@6, n_name@8] + FilterExec: p_size@3 = 48 AND p_type@2 LIKE %TIN + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_mfgr, p_type, p_size], predicate=p_size@5 = 48 AND p_type@4 LIKE %TIN, pruning_predicate=CASE WHEN p_size_null_count@2 = p_size_row_count@3 THEN false ELSE p_size_min@0 <= 48 AND 48 <= p_size_max@1 END, required_guarantees=[p_size in (48)] + +Query Stage #4 (4 -> 4): +ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10445614], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10445614..20890506, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..722], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:722..10446336], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10446336..20891947]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + +Query Stage #5 (4 -> 4): +ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "ps_suppkey", index: 2 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_partkey@0, p_mfgr@1, ps_suppkey@3, ps_supplycost@4] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) + +Query Stage #6 (4 -> 4): +ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "s_nationkey", index: 4 }], 4)) + ProjectionExec: expr=[p_partkey@6 as p_partkey, p_mfgr@7 as p_mfgr, s_name@0 as s_name, s_address@1 as s_address, s_nationkey@2 as s_nationkey, s_phone@3 as s_phone, s_acctbal@4 as s_acctbal, s_comment@5 as s_comment, ps_supplycost@8 as ps_supplycost] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@2)], projection=[s_name@1, s_address@2, s_nationkey@3, s_phone@4, s_acctbal@5, s_comment@6, p_partkey@7, p_mfgr@8, ps_supplycost@10] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(r_regionkey@0, n_regionkey@9)], projection=[p_partkey@1, p_mfgr@2, s_name@3, s_address@4, s_phone@5, s_acctbal@6, s_comment@7, ps_supplycost@8, n_name@9] - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = ASIA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] - ProjectionExec: expr=[p_partkey@2 as p_partkey, p_mfgr@3 as p_mfgr, s_name@4 as s_name, s_address@5 as s_address, s_phone@6 as s_phone, s_acctbal@7 as s_acctbal, s_comment@8 as s_comment, ps_supplycost@9 as ps_supplycost, n_name@0 as n_name, n_regionkey@1 as n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@4)], projection=[n_name@1, n_regionkey@2, p_partkey@3, p_mfgr@4, s_name@5, s_address@6, s_phone@8, s_acctbal@9, s_comment@10, ps_supplycost@11] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] - ProjectionExec: expr=[p_partkey@6 as p_partkey, p_mfgr@7 as p_mfgr, s_name@0 as s_name, s_address@1 as s_address, s_nationkey@2 as s_nationkey, s_phone@3 as s_phone, s_acctbal@4 as s_acctbal, s_comment@5 as s_comment, ps_supplycost@8 as ps_supplycost] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@2)], projection=[s_name@1, s_address@2, s_nationkey@3, s_phone@4, s_acctbal@5, s_comment@6, p_partkey@7, p_mfgr@8, ps_supplycost@10] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_partkey@0, p_mfgr@1, ps_suppkey@3, ps_supplycost@4] - ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_size@3 = 48 AND p_type@2 LIKE %TIN - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_mfgr, p_type, p_size], predicate=p_size@5 = 48 AND p_type@4 LIKE %TIN, pruning_predicate=CASE WHEN p_size_null_count@2 = p_size_row_count@3 THEN false ELSE p_size_min@0 <= 48 AND 48 <= p_size_max@1 END, required_guarantees=[p_size in (48)] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] - ProjectionExec: expr=[min(partsupp.ps_supplycost)@1 as min(partsupp.ps_supplycost), ps_partkey@0 as ps_partkey] - AggregateExec: mode=Single, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(r_regionkey@0, n_regionkey@2)], projection=[ps_partkey@1, ps_supplycost@2] - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = ASIA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] - ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, n_regionkey@0 as n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_regionkey@1, ps_partkey@2, ps_supplycost@3] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_regionkey] - ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_supplycost@4] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "ps_suppkey", index: 2 }], 4)) + +Query Stage #7 (4 -> 4): +ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "n_regionkey", index: 9 }], 4)) + ProjectionExec: expr=[p_partkey@2 as p_partkey, p_mfgr@3 as p_mfgr, s_name@4 as s_name, s_address@5 as s_address, s_phone@6 as s_phone, s_acctbal@7 as s_acctbal, s_comment@8 as s_comment, ps_supplycost@9 as ps_supplycost, n_name@0 as n_name, n_regionkey@1 as n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@4)], projection=[n_name@1, n_regionkey@2, p_partkey@3, p_mfgr@4, s_name@5, s_address@6, s_phone@8, s_acctbal@9, s_comment@10, ps_supplycost@11] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "s_nationkey", index: 4 }], 4)) + +Query Stage #8 (4 -> 4): +ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }, Column { name: "ps_supplycost", index: 7 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@9)], projection=[p_partkey@1, p_mfgr@2, s_name@3, s_address@4, s_phone@5, s_acctbal@6, s_comment@7, ps_supplycost@8, n_name@9] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "n_regionkey", index: 9 }], 4)) + +Query Stage #9 (1 -> 4): +ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: r_name@1 = ASIA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] + +Query Stage #10 (1 -> 4): +ShuffleWriterExec(stage_id=10, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_regionkey] + +Query Stage #11 (2 -> 4): +ShuffleWriterExec(stage_id=11, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + +Query Stage #12 (4 -> 4): +ShuffleWriterExec(stage_id=12, output_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }], 4)) + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10445614], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10445614..20890506, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..722], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:722..10446336], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10446336..20891947]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + +Query Stage #13 (4 -> 4): +ShuffleWriterExec(stage_id=13, output_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_supplycost@4] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=11, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=12, input_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }], 4)) + +Query Stage #14 (4 -> 4): +ShuffleWriterExec(stage_id=14, output_partitioning=Hash([Column { name: "n_regionkey", index: 2 }], 4)) + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, n_regionkey@0 as n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_regionkey@1, ps_partkey@2, ps_supplycost@3] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=10, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=13, input_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) + +Query Stage #15 (4 -> 4): +ShuffleWriterExec(stage_id=15, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) + AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@2)], projection=[ps_partkey@1, ps_supplycost@2] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=14, input_partitioning=Hash([Column { name: "n_regionkey", index: 2 }], 4)) + +Query Stage #16 (4 -> 4): +ShuffleWriterExec(stage_id=16, output_partitioning=Hash([Column { name: "ps_partkey", index: 1 }, Column { name: "min(partsupp.ps_supplycost)", index: 0 }], 4)) + ProjectionExec: expr=[min(partsupp.ps_supplycost)@1 as min(partsupp.ps_supplycost), ps_partkey@0 as ps_partkey] + AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=15, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) + +Query Stage #17 (4 -> 4): +ShuffleWriterExec(stage_id=17, output_partitioning=Hash([Column { name: "p_partkey", index: 3 }], 4)) + SortExec: TopK(fetch=100), expr=[s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[s_acctbal@5 as s_acctbal, s_name@2 as s_name, n_name@7 as n_name, p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, s_address@3 as s_address, s_phone@4 as s_phone, s_comment@6 as s_comment] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@1), (ps_supplycost@7, min(partsupp.ps_supplycost)@0)], projection=[p_partkey@0, p_mfgr@1, s_name@2, s_address@3, s_phone@4, s_acctbal@5, s_comment@6, n_name@8] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }, Column { name: "ps_supplycost", index: 7 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=16, input_partitioning=Hash([Column { name: "ps_partkey", index: 1 }, Column { name: "min(partsupp.ps_supplycost)", index: 0 }], 4)) + +Query Stage #18 (1 -> 1): +GlobalLimitExec: skip=0, fetch=100 + SortPreservingMergeExec: [s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST], fetch=100 + ShuffleReaderExec(stage_id=17, input_partitioning=Hash([Column { name: "p_partkey", index: 3 }], 4)) diff --git a/testdata/expected-plans/q20.txt b/testdata/expected-plans/q20.txt index f2a87f6..d977fac 100644 --- a/testdata/expected-plans/q20.txt +++ b/testdata/expected-plans/q20.txt @@ -29,59 +29,126 @@ Sort: supplier.s_name ASC NULLS LAST DataFusion Physical Plan ======================== -SortExec: expr=[s_name@0 ASC NULLS LAST], preserve_partitioning=[false] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=LeftSemi, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_name@1, s_address@2] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[s_suppkey@1, s_name@2, s_address@3] - ProjectionExec: expr=[n_nationkey@0 as n_nationkey] +SortPreservingMergeExec: [s_name@0 ASC NULLS LAST] + SortExec: expr=[s_name@0 ASC NULLS LAST], preserve_partitioning=[true] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_name@1, s_address@2] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = KENYA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = KENYA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= KENYA AND KENYA <= n_name_max@1 END, required_guarantees=[n_name in (KENYA)] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(ps_partkey@0, l_partkey@1), (ps_suppkey@1, l_suppkey@2)], filter=CAST(ps_availqty@0 AS Float64) > Float64(0.5) * sum(lineitem.l_quantity)@1, projection=[ps_suppkey@1] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(p_partkey@0, ps_partkey@0)] - ProjectionExec: expr=[p_partkey@0 as p_partkey] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[s_suppkey@1, s_name@2, s_address@3] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 + ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = KENYA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = KENYA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= KENYA AND KENYA <= n_name_max@1 END, required_guarantees=[n_name in (KENYA)] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_name@1 LIKE blanched% - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE blanched% - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_availqty] - ProjectionExec: expr=[0.5 * CAST(sum(lineitem.l_quantity)@2 AS Float64) as Float64(0.5) * sum(lineitem.l_quantity), l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey] - AggregateExec: mode=Single, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] - ProjectionExec: expr=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey, l_quantity@2 as l_quantity] + RepartitionExec: partitioning=Hash([s_nationkey@3], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_partkey@0, l_partkey@1), (ps_suppkey@1, l_suppkey@2)], filter=CAST(ps_availqty@0 AS Float64) > Float64(0.5) * sum(lineitem.l_quantity)@1, projection=[ps_suppkey@1] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@3 >= 1993-01-01 AND l_shipdate@3 < 1994-01-01 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], predicate=l_shipdate@10 >= 1993-01-01 AND l_shipdate@10 < 1994-01-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1993-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1994-01-01 END, required_guarantees=[] + RepartitionExec: partitioning=Hash([ps_partkey@0, ps_suppkey@1], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=RightSemi, on=[(p_partkey@0, ps_partkey@0)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 + ProjectionExec: expr=[p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_name@1 LIKE blanched% + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE blanched% + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10445614], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10445614..20890506, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..722], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:722..10446336], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10446336..20891947]]}, projection=[ps_partkey, ps_suppkey, ps_availqty] + ProjectionExec: expr=[0.5 * CAST(sum(lineitem.l_quantity)@2 AS Float64) as Float64(0.5) * sum(lineitem.l_quantity), l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey] + AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_partkey@0, l_suppkey@1], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] + ProjectionExec: expr=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey, l_quantity@2 as l_quantity] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@3 >= 1993-01-01 AND l_shipdate@3 < 1994-01-01 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], predicate=l_shipdate@10 >= 1993-01-01 AND l_shipdate@10 < 1994-01-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1993-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1994-01-01 END, required_guarantees=[] RaySQL Plan =========== -Query Stage #0 (1 -> 1): -SortExec: expr=[s_name@0 ASC NULLS LAST], preserve_partitioning=[false] +Query Stage #0 (1 -> 4): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = KENYA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = KENYA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= KENYA AND KENYA <= n_name_max@1 END, required_guarantees=[n_name in (KENYA)] + +Query Stage #1 (2 -> 4): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4)) + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey] + +Query Stage #2 (4 -> 4): +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=LeftSemi, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_name@1, s_address@2] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[s_suppkey@1, s_name@2, s_address@3] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[s_suppkey@1, s_name@2, s_address@3] - ProjectionExec: expr=[n_nationkey@0 as n_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = KENYA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = KENYA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= KENYA AND KENYA <= n_name_max@1 END, required_guarantees=[n_name in (KENYA)] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey] + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4)) + +Query Stage #3 (2 -> 4): +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) + ProjectionExec: expr=[p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_name@1 LIKE blanched% + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE blanched% + +Query Stage #4 (4 -> 4): +ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10445614], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10445614..20890506, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..722], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:722..10446336], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10446336..20891947]]}, projection=[ps_partkey, ps_suppkey, ps_availqty] + +Query Stage #5 (4 -> 4): +ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }, Column { name: "ps_suppkey", index: 1 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=RightSemi, on=[(p_partkey@0, ps_partkey@0)] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) + +Query Stage #6 (4 -> 4): +ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "l_partkey", index: 0 }, Column { name: "l_suppkey", index: 1 }], 4)) + AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] + ProjectionExec: expr=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey, l_quantity@2 as l_quantity] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@3 >= 1993-01-01 AND l_shipdate@3 < 1994-01-01 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], predicate=l_shipdate@10 >= 1993-01-01 AND l_shipdate@10 < 1994-01-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1993-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1994-01-01 END, required_guarantees=[] + +Query Stage #7 (4 -> 4): +ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_partkey@0, l_partkey@1), (ps_suppkey@1, l_suppkey@2)], filter=CAST(ps_availqty@0 AS Float64) > Float64(0.5) * sum(lineitem.l_quantity)@1, projection=[ps_suppkey@1] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(ps_partkey@0, l_partkey@1), (ps_suppkey@1, l_suppkey@2)], filter=CAST(ps_availqty@0 AS Float64) > Float64(0.5) * sum(lineitem.l_quantity)@1, projection=[ps_suppkey@1] + ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }, Column { name: "ps_suppkey", index: 1 }], 4)) + ProjectionExec: expr=[0.5 * CAST(sum(lineitem.l_quantity)@2 AS Float64) as Float64(0.5) * sum(lineitem.l_quantity), l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey] + AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(p_partkey@0, ps_partkey@0)] - ProjectionExec: expr=[p_partkey@0 as p_partkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_name@1 LIKE blanched% - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE blanched% - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_availqty] - ProjectionExec: expr=[0.5 * CAST(sum(lineitem.l_quantity)@2 AS Float64) as Float64(0.5) * sum(lineitem.l_quantity), l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey] - AggregateExec: mode=Single, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] - ProjectionExec: expr=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey, l_quantity@2 as l_quantity] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@3 >= 1993-01-01 AND l_shipdate@3 < 1994-01-01 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], predicate=l_shipdate@10 >= 1993-01-01 AND l_shipdate@10 < 1994-01-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1993-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1994-01-01 END, required_guarantees=[] + ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "l_partkey", index: 0 }, Column { name: "l_suppkey", index: 1 }], 4)) + +Query Stage #8 (4 -> 1): +ShuffleWriterExec(stage_id=8, output_partitioning=Hash([], 4)) + SortExec: expr=[s_name@0 ASC NULLS LAST], preserve_partitioning=[true] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_name@1, s_address@2] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4)) + +Query Stage #9 (4 -> 1): +SortPreservingMergeExec: [s_name@0 ASC NULLS LAST] + ShuffleReaderExec(stage_id=8, input_partitioning=Hash([], 4)) diff --git a/testdata/expected-plans/q21.txt b/testdata/expected-plans/q21.txt index 1e7e3f5..b5e8809 100644 --- a/testdata/expected-plans/q21.txt +++ b/testdata/expected-plans/q21.txt @@ -37,71 +37,153 @@ Limit: skip=0, fetch=100 DataFusion Physical Plan ======================== -SortExec: TopK(fetch=100), expr=[numwait@1 DESC,s_name@0 ASC NULLS LAST], preserve_partitioning=[false] - ProjectionExec: expr=[s_name@0 as s_name, count(*)@1 as numwait] - AggregateExec: mode=Single, gby=[s_name@0 as s_name], aggr=[count(*)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=LeftAnti, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0, projection=[s_name@0] +GlobalLimitExec: skip=0, fetch=100 + SortPreservingMergeExec: [numwait@1 DESC,s_name@0 ASC NULLS LAST], fetch=100 + SortExec: TopK(fetch=100), expr=[numwait@1 DESC,s_name@0 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[s_name@0 as s_name, count(*)@1 as numwait] + AggregateExec: mode=FinalPartitioned, gby=[s_name@0 as s_name], aggr=[count(*)] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=LeftSemi, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@1)], projection=[s_name@1, l_orderkey@3, l_suppkey@4] - ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + RepartitionExec: partitioning=Hash([s_name@0], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[s_name@0 as s_name], aggr=[count(*)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0, projection=[s_name@0] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = ARGENTINA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ARGENTINA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ARGENTINA AND ARGENTINA <= n_name_max@1 END, required_guarantees=[n_name in (ARGENTINA)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@2)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] - ProjectionExec: expr=[o_orderkey@0 as o_orderkey] + HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0 CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderstatus@1 = F - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_orderstatus], predicate=o_orderstatus@2 = F, pruning_predicate=CASE WHEN o_orderstatus_null_count@2 = o_orderstatus_row_count@3 THEN false ELSE o_orderstatus_min@0 <= F AND F <= o_orderstatus_max@1 END, required_guarantees=[o_orderstatus in (F)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_nationkey] - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] + RepartitionExec: partitioning=Hash([l_orderkey@1], 4), input_partitions=4 CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_receiptdate@3 > l_commitdate@2 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey] - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_receiptdate@3 > l_commitdate@2 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@1)], projection=[s_name@1, l_orderkey@3, l_suppkey@4] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 + ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = ARGENTINA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ARGENTINA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ARGENTINA AND ARGENTINA <= n_name_max@1 END, required_guarantees=[n_name in (ARGENTINA)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_nationkey@1], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@2)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 + ProjectionExec: expr=[o_orderkey@0 as o_orderkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderstatus@1 = F + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_orderstatus], predicate=o_orderstatus@2 = F, pruning_predicate=CASE WHEN o_orderstatus_null_count@2 = o_orderstatus_row_count@3 THEN false ELSE o_orderstatus_min@0 <= F AND F <= o_orderstatus_max@1 END, required_guarantees=[o_orderstatus in (F)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_orderkey@2], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_suppkey@1], 4), input_partitions=4 + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_receiptdate@3 > l_commitdate@2 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_suppkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_receiptdate@3 > l_commitdate@2 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 RaySQL Plan =========== -Query Stage #0 (1 -> 1): -SortExec: TopK(fetch=100), expr=[numwait@1 DESC,s_name@0 ASC NULLS LAST], preserve_partitioning=[false] - ProjectionExec: expr=[s_name@0 as s_name, count(*)@1 as numwait] - AggregateExec: mode=Single, gby=[s_name@0 as s_name], aggr=[count(*)] +Query Stage #0 (1 -> 4): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = ARGENTINA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ARGENTINA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ARGENTINA AND ARGENTINA <= n_name_max@1 END, required_guarantees=[n_name in (ARGENTINA)] + +Query Stage #1 (4 -> 4): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) + ProjectionExec: expr=[o_orderkey@0 as o_orderkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderstatus@1 = F + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_orderstatus], predicate=o_orderstatus@2 = F, pruning_predicate=CASE WHEN o_orderstatus_null_count@2 = o_orderstatus_row_count@3 THEN false ELSE o_orderstatus_min@0 <= F AND F <= o_orderstatus_max@1 END, required_guarantees=[o_orderstatus in (F)] + +Query Stage #2 (2 -> 4): +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_nationkey] + +Query Stage #3 (4 -> 4): +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4)) + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_receiptdate@3 > l_commitdate@2 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + +Query Stage #4 (4 -> 4): +ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "l_orderkey", index: 2 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=LeftAnti, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0, projection=[s_name@0] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=LeftSemi, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@1)], projection=[s_name@1, l_orderkey@3, l_suppkey@4] - ProjectionExec: expr=[n_nationkey@0 as n_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = ARGENTINA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ARGENTINA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ARGENTINA AND ARGENTINA <= n_name_max@1 END, required_guarantees=[n_name in (ARGENTINA)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@2)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] - ProjectionExec: expr=[o_orderkey@0 as o_orderkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderstatus@1 = F - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_orderstatus], predicate=o_orderstatus@2 = F, pruning_predicate=CASE WHEN o_orderstatus_null_count@2 = o_orderstatus_row_count@3 THEN false ELSE o_orderstatus_min@0 <= F AND F <= o_orderstatus_max@1 END, required_guarantees=[o_orderstatus in (F)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_nationkey] - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_receiptdate@3 > l_commitdate@2 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey] - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4)) + +Query Stage #5 (4 -> 4): +ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "s_nationkey", index: 1 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@2)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "l_orderkey", index: 2 }], 4)) + +Query Stage #6 (4 -> 4): +ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "l_orderkey", index: 1 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@1)], projection=[s_name@1, l_orderkey@3, l_suppkey@4] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "s_nationkey", index: 1 }], 4)) + +Query Stage #7 (4 -> 4): +ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_suppkey] + +Query Stage #8 (4 -> 4): +ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_receiptdate@3 > l_commitdate@2 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + +Query Stage #9 (4 -> 4): +ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "s_name", index: 0 }], 4)) + AggregateExec: mode=Partial, gby=[s_name@0 as s_name], aggr=[count(*)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0, projection=[s_name@0] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0 CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_receiptdate@3 > l_commitdate@2 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "l_orderkey", index: 1 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) + +Query Stage #10 (4 -> 4): +ShuffleWriterExec(stage_id=10, output_partitioning=Hash([Column { name: "s_name", index: 0 }], 4)) + SortExec: TopK(fetch=100), expr=[numwait@1 DESC,s_name@0 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[s_name@0 as s_name, count(*)@1 as numwait] + AggregateExec: mode=FinalPartitioned, gby=[s_name@0 as s_name], aggr=[count(*)] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "s_name", index: 0 }], 4)) + +Query Stage #11 (1 -> 1): +GlobalLimitExec: skip=0, fetch=100 + SortPreservingMergeExec: [numwait@1 DESC,s_name@0 ASC NULLS LAST], fetch=100 + ShuffleReaderExec(stage_id=10, input_partitioning=Hash([Column { name: "s_name", index: 0 }], 4)) diff --git a/testdata/expected-plans/q22.txt b/testdata/expected-plans/q22.txt index 3236874..564ce9c 100644 --- a/testdata/expected-plans/q22.txt +++ b/testdata/expected-plans/q22.txt @@ -22,41 +22,78 @@ Sort: custsale.cntrycode ASC NULLS LAST DataFusion Physical Plan ======================== -SortExec: expr=[cntrycode@0 ASC NULLS LAST], preserve_partitioning=[false] - ProjectionExec: expr=[cntrycode@0 as cntrycode, count(*)@1 as numcust, sum(custsale.c_acctbal)@2 as totacctbal] - AggregateExec: mode=Single, gby=[cntrycode@0 as cntrycode], aggr=[count(*), sum(custsale.c_acctbal)] - ProjectionExec: expr=[substr(c_phone@1, 1, 2) as cntrycode, c_acctbal@2 as c_acctbal] - NestedLoopJoinExec: join_type=Inner, filter=CAST(c_acctbal@0 AS Decimal128(15, 6)) > avg(customer.c_acctbal)@1 - AggregateExec: mode=Single, gby=[], aggr=[avg(customer.c_acctbal)] - ProjectionExec: expr=[c_acctbal@1 as c_acctbal] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: c_acctbal@1 > Some(0),11,2 AND Use substr(c_phone@0, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_phone, c_acctbal], predicate=c_acctbal@5 > Some(0),11,2 AND Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) AND c_acctbal@5 > Some(0),11,2, pruning_predicate=CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END AND CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END, required_guarantees=[] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=LeftAnti, on=[(c_custkey@0, o_custkey@0)], projection=[c_phone@1, c_acctbal@2] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: Use substr(c_phone@1, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_phone, c_acctbal], predicate=Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_custkey] +SortPreservingMergeExec: [cntrycode@0 ASC NULLS LAST] + SortExec: expr=[cntrycode@0 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[cntrycode@0 as cntrycode, count(*)@1 as numcust, sum(custsale.c_acctbal)@2 as totacctbal] + AggregateExec: mode=FinalPartitioned, gby=[cntrycode@0 as cntrycode], aggr=[count(*), sum(custsale.c_acctbal)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([cntrycode@0], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[cntrycode@0 as cntrycode], aggr=[count(*), sum(custsale.c_acctbal)] + ProjectionExec: expr=[substr(c_phone@1, 1, 2) as cntrycode, c_acctbal@2 as c_acctbal] + NestedLoopJoinExec: join_type=Inner, filter=CAST(c_acctbal@0 AS Decimal128(15, 6)) > avg(customer.c_acctbal)@1 + AggregateExec: mode=Final, gby=[], aggr=[avg(customer.c_acctbal)] + CoalescePartitionsExec + AggregateExec: mode=Partial, gby=[], aggr=[avg(customer.c_acctbal)] + ProjectionExec: expr=[c_acctbal@1 as c_acctbal] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: c_acctbal@1 > Some(0),11,2 AND Use substr(c_phone@0, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3390424], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3390424..6780848], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6780848..6782476, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3388796], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3388796..6779220]]}, projection=[c_phone, c_acctbal], predicate=c_acctbal@5 > Some(0),11,2 AND Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) AND c_acctbal@5 > Some(0),11,2, pruning_predicate=CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END AND CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END, required_guarantees=[] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(c_custkey@0, o_custkey@0)], projection=[c_phone@1, c_acctbal@2] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: Use substr(c_phone@1, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3390424], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3390424..6780848], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6780848..6782476, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3388796], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3388796..6779220]]}, projection=[c_custkey, c_phone, c_acctbal], predicate=Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_custkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_custkey] RaySQL Plan =========== -Query Stage #0 (1 -> 1): -SortExec: expr=[cntrycode@0 ASC NULLS LAST], preserve_partitioning=[false] - ProjectionExec: expr=[cntrycode@0 as cntrycode, count(*)@1 as numcust, sum(custsale.c_acctbal)@2 as totacctbal] - AggregateExec: mode=Single, gby=[cntrycode@0 as cntrycode], aggr=[count(*), sum(custsale.c_acctbal)] - ProjectionExec: expr=[substr(c_phone@1, 1, 2) as cntrycode, c_acctbal@2 as c_acctbal] - NestedLoopJoinExec: join_type=Inner, filter=CAST(c_acctbal@0 AS Decimal128(15, 6)) > avg(customer.c_acctbal)@1 - AggregateExec: mode=Single, gby=[], aggr=[avg(customer.c_acctbal)] - ProjectionExec: expr=[c_acctbal@1 as c_acctbal] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: c_acctbal@1 > Some(0),11,2 AND Use substr(c_phone@0, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_phone, c_acctbal], predicate=c_acctbal@5 > Some(0),11,2 AND Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) AND c_acctbal@5 > Some(0),11,2, pruning_predicate=CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END AND CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END, required_guarantees=[] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=LeftAnti, on=[(c_custkey@0, o_custkey@0)], projection=[c_phone@1, c_acctbal@2] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: Use substr(c_phone@1, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_phone, c_acctbal], predicate=Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_custkey] +Query Stage #0 (4 -> 1): +ShuffleWriterExec(stage_id=0, output_partitioning=UnknownPartitioning(4)) + AggregateExec: mode=Partial, gby=[], aggr=[avg(customer.c_acctbal)] + ProjectionExec: expr=[c_acctbal@1 as c_acctbal] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: c_acctbal@1 > Some(0),11,2 AND Use substr(c_phone@0, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3390424], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3390424..6780848], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6780848..6782476, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3388796], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3388796..6779220]]}, projection=[c_phone, c_acctbal], predicate=c_acctbal@5 > Some(0),11,2 AND Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) AND c_acctbal@5 > Some(0),11,2, pruning_predicate=CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END AND CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END, required_guarantees=[] + +Query Stage #1 (4 -> 4): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: Use substr(c_phone@1, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3390424], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3390424..6780848], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6780848..6782476, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3388796], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3388796..6779220]]}, projection=[c_custkey, c_phone, c_acctbal], predicate=Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) + +Query Stage #2 (4 -> 4): +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "o_custkey", index: 0 }], 4)) + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_custkey] + +Query Stage #3 (4 -> 4): +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "cntrycode", index: 0 }], 4)) + AggregateExec: mode=Partial, gby=[cntrycode@0 as cntrycode], aggr=[count(*), sum(custsale.c_acctbal)] + ProjectionExec: expr=[substr(c_phone@1, 1, 2) as cntrycode, c_acctbal@2 as c_acctbal] + NestedLoopJoinExec: join_type=Inner, filter=CAST(c_acctbal@0 AS Decimal128(15, 6)) > avg(customer.c_acctbal)@1 + AggregateExec: mode=Final, gby=[], aggr=[avg(customer.c_acctbal)] + CoalescePartitionsExec + ShuffleReaderExec(stage_id=0, input_partitioning=UnknownPartitioning(4)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(c_custkey@0, o_custkey@0)], projection=[c_phone@1, c_acctbal@2] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "o_custkey", index: 0 }], 4)) + +Query Stage #4 (4 -> 4): +ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "cntrycode", index: 0 }], 4)) + SortExec: expr=[cntrycode@0 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[cntrycode@0 as cntrycode, count(*)@1 as numcust, sum(custsale.c_acctbal)@2 as totacctbal] + AggregateExec: mode=FinalPartitioned, gby=[cntrycode@0 as cntrycode], aggr=[count(*), sum(custsale.c_acctbal)] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "cntrycode", index: 0 }], 4)) + +Query Stage #5 (4 -> 1): +SortPreservingMergeExec: [cntrycode@0 ASC NULLS LAST] + ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "cntrycode", index: 0 }], 4)) diff --git a/testdata/expected-plans/q3.txt b/testdata/expected-plans/q3.txt index 5e528af..790fc2d 100644 --- a/testdata/expected-plans/q3.txt +++ b/testdata/expected-plans/q3.txt @@ -21,45 +21,90 @@ Limit: skip=0, fetch=10 DataFusion Physical Plan ======================== -SortExec: TopK(fetch=10), expr=[revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], preserve_partitioning=[false] - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@3 as revenue, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority] - AggregateExec: mode=Single, gby=[l_orderkey@2 as l_orderkey, o_orderdate@0 as o_orderdate, o_shippriority@1 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, o_shippriority@2, l_orderkey@3, l_extendedprice@4, l_discount@5] +GlobalLimitExec: skip=0, fetch=10 + SortPreservingMergeExec: [revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], fetch=10 + SortExec: TopK(fetch=10), expr=[revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@3 as revenue, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority] + AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[o_orderkey@1, o_orderdate@3, o_shippriority@4] - ProjectionExec: expr=[c_custkey@0 as c_custkey] + RepartitionExec: partitioning=Hash([l_orderkey@0, o_orderdate@1, o_shippriority@2], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[l_orderkey@2 as l_orderkey, o_orderdate@0 as o_orderdate, o_shippriority@1 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: c_mktsegment@1 = BUILDING - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_mktsegment], predicate=c_mktsegment@6 = BUILDING, pruning_predicate=CASE WHEN c_mktsegment_null_count@2 = c_mktsegment_row_count@3 THEN false ELSE c_mktsegment_min@0 <= BUILDING AND BUILDING <= c_mktsegment_max@1 END, required_guarantees=[c_mktsegment in (BUILDING)] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 < 1995-03-15 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], predicate=o_orderdate@4 < 1995-03-15, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@0 < 1995-03-15 END, required_guarantees=[] - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@3 > 1995-03-15 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 > 1995-03-15, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 > 1995-03-15 END, required_guarantees=[] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, o_shippriority@2, l_orderkey@3, l_extendedprice@4, l_discount@5] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[o_orderkey@1, o_orderdate@3, o_shippriority@4] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 + ProjectionExec: expr=[c_custkey@0 as c_custkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: c_mktsegment@1 = BUILDING + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3390424], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3390424..6780848], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6780848..6782476, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3388796], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3388796..6779220]]}, projection=[c_custkey, c_mktsegment], predicate=c_mktsegment@6 = BUILDING, pruning_predicate=CASE WHEN c_mktsegment_null_count@2 = c_mktsegment_row_count@3 THEN false ELSE c_mktsegment_min@0 <= BUILDING AND BUILDING <= c_mktsegment_max@1 END, required_guarantees=[c_mktsegment in (BUILDING)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@2 < 1995-03-15 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], predicate=o_orderdate@4 < 1995-03-15, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@0 < 1995-03-15 END, required_guarantees=[] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@3 > 1995-03-15 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 > 1995-03-15, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 > 1995-03-15 END, required_guarantees=[] RaySQL Plan =========== -Query Stage #0 (1 -> 1): -SortExec: TopK(fetch=10), expr=[revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], preserve_partitioning=[false] - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@3 as revenue, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority] - AggregateExec: mode=Single, gby=[l_orderkey@2 as l_orderkey, o_orderdate@0 as o_orderdate, o_shippriority@1 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] +Query Stage #0 (4 -> 4): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) + ProjectionExec: expr=[c_custkey@0 as c_custkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: c_mktsegment@1 = BUILDING + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3390424], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3390424..6780848], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6780848..6782476, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3388796], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3388796..6779220]]}, projection=[c_custkey, c_mktsegment], predicate=c_mktsegment@6 = BUILDING, pruning_predicate=CASE WHEN c_mktsegment_null_count@2 = c_mktsegment_row_count@3 THEN false ELSE c_mktsegment_min@0 <= BUILDING AND BUILDING <= c_mktsegment_max@1 END, required_guarantees=[c_mktsegment in (BUILDING)] + +Query Stage #1 (4 -> 4): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@2 < 1995-03-15 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], predicate=o_orderdate@4 < 1995-03-15, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@0 < 1995-03-15 END, required_guarantees=[] + +Query Stage #2 (4 -> 4): +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[o_orderkey@1, o_orderdate@3, o_shippriority@4] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, o_shippriority@2, l_orderkey@3, l_extendedprice@4, l_discount@5] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[o_orderkey@1, o_orderdate@3, o_shippriority@4] - ProjectionExec: expr=[c_custkey@0 as c_custkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: c_mktsegment@1 = BUILDING - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_mktsegment], predicate=c_mktsegment@6 = BUILDING, pruning_predicate=CASE WHEN c_mktsegment_null_count@2 = c_mktsegment_row_count@3 THEN false ELSE c_mktsegment_min@0 <= BUILDING AND BUILDING <= c_mktsegment_max@1 END, required_guarantees=[c_mktsegment in (BUILDING)] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 < 1995-03-15 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], predicate=o_orderdate@4 < 1995-03-15, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@0 < 1995-03-15 END, required_guarantees=[] - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@3 > 1995-03-15 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 > 1995-03-15, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 > 1995-03-15 END, required_guarantees=[] + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) + +Query Stage #3 (4 -> 4): +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@3 > 1995-03-15 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 > 1995-03-15, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 > 1995-03-15 END, required_guarantees=[] + +Query Stage #4 (4 -> 4): +ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }, Column { name: "o_orderdate", index: 1 }, Column { name: "o_shippriority", index: 2 }], 4)) + AggregateExec: mode=Partial, gby=[l_orderkey@2 as l_orderkey, o_orderdate@0 as o_orderdate, o_shippriority@1 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, o_shippriority@2, l_orderkey@3, l_extendedprice@4, l_discount@5] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) + +Query Stage #5 (4 -> 4): +ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }, Column { name: "o_orderdate", index: 2 }, Column { name: "o_shippriority", index: 3 }], 4)) + SortExec: TopK(fetch=10), expr=[revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@3 as revenue, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority] + AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }, Column { name: "o_orderdate", index: 1 }, Column { name: "o_shippriority", index: 2 }], 4)) + +Query Stage #6 (1 -> 1): +GlobalLimitExec: skip=0, fetch=10 + SortPreservingMergeExec: [revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], fetch=10 + ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }, Column { name: "o_orderdate", index: 2 }, Column { name: "o_shippriority", index: 3 }], 4)) diff --git a/testdata/expected-plans/q4.txt b/testdata/expected-plans/q4.txt index 3a99247..c830fbd 100644 --- a/testdata/expected-plans/q4.txt +++ b/testdata/expected-plans/q4.txt @@ -17,35 +17,64 @@ Sort: orders.o_orderpriority ASC NULLS LAST DataFusion Physical Plan ======================== -SortExec: expr=[o_orderpriority@0 ASC NULLS LAST], preserve_partitioning=[false] - ProjectionExec: expr=[o_orderpriority@0 as o_orderpriority, count(*)@1 as order_count] - AggregateExec: mode=Single, gby=[o_orderpriority@0 as o_orderpriority], aggr=[count(*)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=LeftSemi, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderpriority@1] - ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_orderpriority@2 as o_orderpriority] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@1 >= 1995-04-01 AND o_orderdate@1 < 1995-07-01 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_orderdate, o_orderpriority], predicate=o_orderdate@4 >= 1995-04-01 AND o_orderdate@4 < 1995-07-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-04-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-07-01 END, required_guarantees=[] - ProjectionExec: expr=[l_orderkey@0 as l_orderkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_receiptdate@2 > l_commitdate@1 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 +SortPreservingMergeExec: [o_orderpriority@0 ASC NULLS LAST] + SortExec: expr=[o_orderpriority@0 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[o_orderpriority@0 as o_orderpriority, count(*)@1 as order_count] + AggregateExec: mode=FinalPartitioned, gby=[o_orderpriority@0 as o_orderpriority], aggr=[count(*)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_orderpriority@0], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[o_orderpriority@0 as o_orderpriority], aggr=[count(*)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderpriority@1] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 + ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_orderpriority@2 as o_orderpriority] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@1 >= 1995-04-01 AND o_orderdate@1 < 1995-07-01 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_orderdate, o_orderpriority], predicate=o_orderdate@4 >= 1995-04-01 AND o_orderdate@4 < 1995-07-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-04-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-07-01 END, required_guarantees=[] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 + ProjectionExec: expr=[l_orderkey@0 as l_orderkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_receiptdate@2 > l_commitdate@1 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 RaySQL Plan =========== -Query Stage #0 (1 -> 1): -SortExec: expr=[o_orderpriority@0 ASC NULLS LAST], preserve_partitioning=[false] - ProjectionExec: expr=[o_orderpriority@0 as o_orderpriority, count(*)@1 as order_count] - AggregateExec: mode=Single, gby=[o_orderpriority@0 as o_orderpriority], aggr=[count(*)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=LeftSemi, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderpriority@1] - ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_orderpriority@2 as o_orderpriority] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@1 >= 1995-04-01 AND o_orderdate@1 < 1995-07-01 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_orderdate, o_orderpriority], predicate=o_orderdate@4 >= 1995-04-01 AND o_orderdate@4 < 1995-07-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-04-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-07-01 END, required_guarantees=[] - ProjectionExec: expr=[l_orderkey@0 as l_orderkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_receiptdate@2 > l_commitdate@1 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 +Query Stage #0 (4 -> 4): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) + ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_orderpriority@2 as o_orderpriority] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@1 >= 1995-04-01 AND o_orderdate@1 < 1995-07-01 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_orderdate, o_orderpriority], predicate=o_orderdate@4 >= 1995-04-01 AND o_orderdate@4 < 1995-07-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-04-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-07-01 END, required_guarantees=[] + +Query Stage #1 (4 -> 4): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) + ProjectionExec: expr=[l_orderkey@0 as l_orderkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_receiptdate@2 > l_commitdate@1 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + +Query Stage #2 (4 -> 4): +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "o_orderpriority", index: 0 }], 4)) + AggregateExec: mode=Partial, gby=[o_orderpriority@0 as o_orderpriority], aggr=[count(*)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderpriority@1] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) + +Query Stage #3 (4 -> 4): +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "o_orderpriority", index: 0 }], 4)) + SortExec: expr=[o_orderpriority@0 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[o_orderpriority@0 as o_orderpriority, count(*)@1 as order_count] + AggregateExec: mode=FinalPartitioned, gby=[o_orderpriority@0 as o_orderpriority], aggr=[count(*)] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "o_orderpriority", index: 0 }], 4)) + +Query Stage #4 (4 -> 1): +SortPreservingMergeExec: [o_orderpriority@0 ASC NULLS LAST] + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "o_orderpriority", index: 0 }], 4)) diff --git a/testdata/expected-plans/q5.txt b/testdata/expected-plans/q5.txt index 2423a97..9773172 100644 --- a/testdata/expected-plans/q5.txt +++ b/testdata/expected-plans/q5.txt @@ -28,63 +28,150 @@ Sort: revenue DESC NULLS FIRST DataFusion Physical Plan ======================== -SortExec: expr=[revenue@1 DESC], preserve_partitioning=[false] - ProjectionExec: expr=[n_name@0 as n_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as revenue] - AggregateExec: mode=Single, gby=[n_name@2 as n_name], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, n_name@3] - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = AFRICA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = AFRICA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= AFRICA AND AFRICA <= r_name_max@1 END, required_guarantees=[r_name in (AFRICA)] - ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, n_name@0 as n_name, n_regionkey@1 as n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, n_regionkey@2, l_extendedprice@3, l_discount@4] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@0 as s_nationkey] +SortPreservingMergeExec: [revenue@1 DESC] + SortExec: expr=[revenue@1 DESC], preserve_partitioning=[true] + ProjectionExec: expr=[n_name@0 as n_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as revenue] + AggregateExec: mode=FinalPartitioned, gby=[n_name@0 as n_name], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([n_name@0], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[n_name@2 as n_name], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, n_name@3] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1), (s_nationkey@1, c_nationkey@0)], projection=[s_nationkey@1, l_extendedprice@4, l_discount@5] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@1, l_orderkey@0)], projection=[c_nationkey@0, l_suppkey@3, l_extendedprice@4, l_discount@5] + RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_nationkey@1, o_orderkey@2] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_nationkey] - ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 >= 1994-01-01 AND o_orderdate@2 < 1995-01-01 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1994-01-01 AND o_orderdate@4 < 1995-01-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1994-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-01-01 END, required_guarantees=[] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] + FilterExec: r_name@1 = AFRICA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = AFRICA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= AFRICA AND AFRICA <= r_name_max@1 END, required_guarantees=[r_name in (AFRICA)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([n_regionkey@3], 4), input_partitions=4 + ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, n_name@0 as n_name, n_regionkey@1 as n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, n_regionkey@2, l_extendedprice@3, l_discount@4] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4 + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1), (s_nationkey@1, c_nationkey@0)], projection=[s_nationkey@1, l_extendedprice@4, l_discount@5] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_suppkey@0, s_nationkey@1], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_suppkey@1, c_nationkey@0], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@1, l_orderkey@0)], projection=[c_nationkey@0, l_suppkey@3, l_extendedprice@4, l_discount@5] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_orderkey@1], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_nationkey@1, o_orderkey@2] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3390424], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3390424..6780848], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6780848..6782476, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3388796], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3388796..6779220]]}, projection=[c_custkey, c_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4 + ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@2 >= 1994-01-01 AND o_orderdate@2 < 1995-01-01 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1994-01-01 AND o_orderdate@4 < 1995-01-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1994-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-01-01 END, required_guarantees=[] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] RaySQL Plan =========== -Query Stage #0 (1 -> 1): -SortExec: expr=[revenue@1 DESC], preserve_partitioning=[false] - ProjectionExec: expr=[n_name@0 as n_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as revenue] - AggregateExec: mode=Single, gby=[n_name@2 as n_name], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] +Query Stage #0 (1 -> 4): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: r_name@1 = AFRICA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = AFRICA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= AFRICA AND AFRICA <= r_name_max@1 END, required_guarantees=[r_name in (AFRICA)] + +Query Stage #1 (1 -> 4): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] + +Query Stage #2 (2 -> 4): +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }, Column { name: "s_nationkey", index: 1 }], 4)) + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + +Query Stage #3 (4 -> 4): +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3390424], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3390424..6780848], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6780848..6782476, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3388796], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3388796..6779220]]}, projection=[c_custkey, c_nationkey] + +Query Stage #4 (4 -> 4): +ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) + ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@2 >= 1994-01-01 AND o_orderdate@2 < 1995-01-01 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1994-01-01 AND o_orderdate@4 < 1995-01-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1994-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-01-01 END, required_guarantees=[] + +Query Stage #5 (4 -> 4): +ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "o_orderkey", index: 1 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_nationkey@1, o_orderkey@2] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, n_name@3] - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = AFRICA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = AFRICA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= AFRICA AND AFRICA <= r_name_max@1 END, required_guarantees=[r_name in (AFRICA)] - ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, n_name@0 as n_name, n_regionkey@1 as n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, n_regionkey@2, l_extendedprice@3, l_discount@4] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1), (s_nationkey@1, c_nationkey@0)], projection=[s_nationkey@1, l_extendedprice@4, l_discount@5] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@1, l_orderkey@0)], projection=[c_nationkey@0, l_suppkey@3, l_extendedprice@4, l_discount@5] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_nationkey@1, o_orderkey@2] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_nationkey] - ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 >= 1994-01-01 AND o_orderdate@2 < 1995-01-01 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1994-01-01 AND o_orderdate@4 < 1995-01-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1994-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-01-01 END, required_guarantees=[] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) + +Query Stage #6 (4 -> 4): +ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] + +Query Stage #7 (4 -> 4): +ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "l_suppkey", index: 1 }, Column { name: "c_nationkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@1, l_orderkey@0)], projection=[c_nationkey@0, l_suppkey@3, l_extendedprice@4, l_discount@5] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "o_orderkey", index: 1 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) + +Query Stage #8 (4 -> 4): +ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1), (s_nationkey@1, c_nationkey@0)], projection=[s_nationkey@1, l_extendedprice@4, l_discount@5] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }, Column { name: "s_nationkey", index: 1 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "l_suppkey", index: 1 }, Column { name: "c_nationkey", index: 0 }], 4)) + +Query Stage #9 (4 -> 4): +ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "n_regionkey", index: 3 }], 4)) + ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, n_name@0 as n_name, n_regionkey@1 as n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, n_regionkey@2, l_extendedprice@3, l_discount@4] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) + +Query Stage #10 (4 -> 4): +ShuffleWriterExec(stage_id=10, output_partitioning=Hash([Column { name: "n_name", index: 0 }], 4)) + AggregateExec: mode=Partial, gby=[n_name@2 as n_name], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, n_name@3] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "n_regionkey", index: 3 }], 4)) + +Query Stage #11 (4 -> 4): +ShuffleWriterExec(stage_id=11, output_partitioning=Hash([Column { name: "n_name", index: 0 }], 4)) + SortExec: expr=[revenue@1 DESC], preserve_partitioning=[true] + ProjectionExec: expr=[n_name@0 as n_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as revenue] + AggregateExec: mode=FinalPartitioned, gby=[n_name@0 as n_name], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=10, input_partitioning=Hash([Column { name: "n_name", index: 0 }], 4)) + +Query Stage #12 (4 -> 1): +SortPreservingMergeExec: [revenue@1 DESC] + ShuffleReaderExec(stage_id=11, input_partitioning=Hash([Column { name: "n_name", index: 0 }], 4)) diff --git a/testdata/expected-plans/q6.txt b/testdata/expected-plans/q6.txt index 88c45cb..6f9143d 100644 --- a/testdata/expected-plans/q6.txt +++ b/testdata/expected-plans/q6.txt @@ -11,20 +11,28 @@ DataFusion Physical Plan ======================== ProjectionExec: expr=[sum(lineitem.l_extendedprice * lineitem.l_discount)@0 as revenue] - AggregateExec: mode=Single, gby=[], aggr=[sum(lineitem.l_extendedprice * lineitem.l_discount)] - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@3 >= 1994-01-01 AND l_shipdate@3 < 1995-01-01 AND l_discount@2 >= Some(3),11,2 AND l_discount@2 <= Some(5),11,2 AND l_quantity@0 < Some(2400),11,2 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1994-01-01 AND l_shipdate@10 < 1995-01-01 AND l_discount@6 >= Some(3),11,2 AND l_discount@6 <= Some(5),11,2 AND l_quantity@4 < Some(2400),11,2, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1994-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-01-01 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_max@4 >= Some(3),11,2 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_min@7 <= Some(5),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@8 < Some(2400),11,2 END, required_guarantees=[] + AggregateExec: mode=Final, gby=[], aggr=[sum(lineitem.l_extendedprice * lineitem.l_discount)] + CoalescePartitionsExec + AggregateExec: mode=Partial, gby=[], aggr=[sum(lineitem.l_extendedprice * lineitem.l_discount)] + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@3 >= 1994-01-01 AND l_shipdate@3 < 1995-01-01 AND l_discount@2 >= Some(3),11,2 AND l_discount@2 <= Some(5),11,2 AND l_quantity@0 < Some(2400),11,2 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1994-01-01 AND l_shipdate@10 < 1995-01-01 AND l_discount@6 >= Some(3),11,2 AND l_discount@6 <= Some(5),11,2 AND l_quantity@4 < Some(2400),11,2, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1994-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-01-01 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_max@4 >= Some(3),11,2 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_min@7 <= Some(5),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@8 < Some(2400),11,2 END, required_guarantees=[] RaySQL Plan =========== -Query Stage #0 (1 -> 1): -ProjectionExec: expr=[sum(lineitem.l_extendedprice * lineitem.l_discount)@0 as revenue] - AggregateExec: mode=Single, gby=[], aggr=[sum(lineitem.l_extendedprice * lineitem.l_discount)] +Query Stage #0 (4 -> 1): +ShuffleWriterExec(stage_id=0, output_partitioning=UnknownPartitioning(4)) + AggregateExec: mode=Partial, gby=[], aggr=[sum(lineitem.l_extendedprice * lineitem.l_discount)] ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@3 >= 1994-01-01 AND l_shipdate@3 < 1995-01-01 AND l_discount@2 >= Some(3),11,2 AND l_discount@2 <= Some(5),11,2 AND l_quantity@0 < Some(2400),11,2 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1994-01-01 AND l_shipdate@10 < 1995-01-01 AND l_discount@6 >= Some(3),11,2 AND l_discount@6 <= Some(5),11,2 AND l_quantity@4 < Some(2400),11,2, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1994-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-01-01 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_max@4 >= Some(3),11,2 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_min@7 <= Some(5),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@8 < Some(2400),11,2 END, required_guarantees=[] + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1994-01-01 AND l_shipdate@10 < 1995-01-01 AND l_discount@6 >= Some(3),11,2 AND l_discount@6 <= Some(5),11,2 AND l_quantity@4 < Some(2400),11,2, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1994-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-01-01 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_max@4 >= Some(3),11,2 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_min@7 <= Some(5),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@8 < Some(2400),11,2 END, required_guarantees=[] + +Query Stage #1 (1 -> 1): +ProjectionExec: expr=[sum(lineitem.l_extendedprice * lineitem.l_discount)@0 as revenue] + AggregateExec: mode=Final, gby=[], aggr=[sum(lineitem.l_extendedprice * lineitem.l_discount)] + CoalescePartitionsExec + ShuffleReaderExec(stage_id=0, input_partitioning=UnknownPartitioning(4)) diff --git a/testdata/expected-plans/q7.txt b/testdata/expected-plans/q7.txt index 190dad4..5253594 100644 --- a/testdata/expected-plans/q7.txt +++ b/testdata/expected-plans/q7.txt @@ -30,65 +30,153 @@ Sort: shipping.supp_nation ASC NULLS LAST, shipping.cust_nation ASC NULLS LAST, DataFusion Physical Plan ======================== -SortExec: expr=[supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS LAST,l_year@2 ASC NULLS LAST], preserve_partitioning=[false] - ProjectionExec: expr=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year, sum(shipping.volume)@3 as revenue] - AggregateExec: mode=Single, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[sum(shipping.volume)] - ProjectionExec: expr=[n_name@4 as supp_nation, n_name@0 as cust_nation, date_part(YEAR, l_shipdate@3) as l_year, l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as volume] +SortPreservingMergeExec: [supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS LAST,l_year@2 ASC NULLS LAST] + SortExec: expr=[supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS LAST,l_year@2 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year, sum(shipping.volume)@3 as revenue] + AggregateExec: mode=FinalPartitioned, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[sum(shipping.volume)] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], filter=n_name@0 = GERMANY AND n_name@1 = IRAQ OR n_name@0 = IRAQ AND n_name@1 = GERMANY, projection=[n_name@1, l_extendedprice@2, l_discount@3, l_shipdate@4, n_name@6] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = IRAQ OR n_name@1 = GERMANY - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = IRAQ OR n_name@1 = GERMANY, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_shipdate@3 as l_shipdate, c_nationkey@4 as c_nationkey, n_name@0 as n_name] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@0)], projection=[n_name@1, l_extendedprice@3, l_discount@4, l_shipdate@5, c_nationkey@6] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = GERMANY OR n_name@1 = IRAQ - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = GERMANY OR n_name@1 = IRAQ, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] - ProjectionExec: expr=[s_nationkey@1 as s_nationkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, l_shipdate@4 as l_shipdate, c_nationkey@0 as c_nationkey] + RepartitionExec: partitioning=Hash([supp_nation@0, cust_nation@1, l_year@2], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[sum(shipping.volume)] + ProjectionExec: expr=[n_name@4 as supp_nation, n_name@0 as cust_nation, date_part(YEAR, l_shipdate@3) as l_year, l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as volume] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], filter=n_name@0 = GERMANY AND n_name@1 = IRAQ OR n_name@0 = IRAQ AND n_name@1 = GERMANY, projection=[n_name@1, l_extendedprice@2, l_discount@3, l_shipdate@4, n_name@6] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@4)], projection=[c_nationkey@1, s_nationkey@2, l_extendedprice@3, l_discount@4, l_shipdate@5] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(l_orderkey@1, o_orderkey@0)], projection=[s_nationkey@0, l_extendedprice@2, l_discount@3, l_shipdate@4, o_custkey@6] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5, l_shipdate@6] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@4 >= 1995-01-01 AND l_shipdate@4 <= 1996-12-31 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-01-01 AND l_shipdate@10 <= 1996-12-31, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 <= 1996-12-31 END, required_guarantees=[] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey] + RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = IRAQ OR n_name@1 = GERMANY + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = IRAQ OR n_name@1 = GERMANY, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END, required_guarantees=[n_name in (IRAQ, GERMANY)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([c_nationkey@3], 4), input_partitions=4 + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_shipdate@3 as l_shipdate, c_nationkey@4 as c_nationkey, n_name@0 as n_name] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@0)], projection=[n_name@1, l_extendedprice@3, l_discount@4, l_shipdate@5, c_nationkey@6] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = GERMANY OR n_name@1 = IRAQ + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = GERMANY OR n_name@1 = IRAQ, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_nationkey@0], 4), input_partitions=4 + ProjectionExec: expr=[s_nationkey@1 as s_nationkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, l_shipdate@4 as l_shipdate, c_nationkey@0 as c_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@4)], projection=[c_nationkey@1, s_nationkey@2, l_extendedprice@3, l_discount@4, l_shipdate@5] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3390424], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3390424..6780848], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6780848..6782476, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3388796], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3388796..6779220]]}, projection=[c_custkey, c_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_custkey@4], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_orderkey@1, o_orderkey@0)], projection=[s_nationkey@0, l_extendedprice@2, l_discount@3, l_shipdate@4, o_custkey@6] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_orderkey@1], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5, l_shipdate@6] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_suppkey@1], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@4 >= 1995-01-01 AND l_shipdate@4 <= 1996-12-31 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-01-01 AND l_shipdate@10 <= 1996-12-31, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 <= 1996-12-31 END, required_guarantees=[] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_custkey] RaySQL Plan =========== -Query Stage #0 (1 -> 1): -SortExec: expr=[supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS LAST,l_year@2 ASC NULLS LAST], preserve_partitioning=[false] - ProjectionExec: expr=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year, sum(shipping.volume)@3 as revenue] - AggregateExec: mode=Single, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[sum(shipping.volume)] - ProjectionExec: expr=[n_name@4 as supp_nation, n_name@0 as cust_nation, date_part(YEAR, l_shipdate@3) as l_year, l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as volume] +Query Stage #0 (1 -> 4): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = IRAQ OR n_name@1 = GERMANY + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = IRAQ OR n_name@1 = GERMANY, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END, required_guarantees=[n_name in (IRAQ, GERMANY)] + +Query Stage #1 (1 -> 4): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = GERMANY OR n_name@1 = IRAQ + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = GERMANY OR n_name@1 = IRAQ, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] + +Query Stage #2 (4 -> 4): +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3390424], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3390424..6780848], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6780848..6782476, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3388796], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3388796..6779220]]}, projection=[c_custkey, c_nationkey] + +Query Stage #3 (2 -> 4): +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + +Query Stage #4 (4 -> 4): +ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@4 >= 1995-01-01 AND l_shipdate@4 <= 1996-12-31 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-01-01 AND l_shipdate@10 <= 1996-12-31, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 <= 1996-12-31 END, required_guarantees=[] + +Query Stage #5 (4 -> 4): +ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "l_orderkey", index: 1 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5, l_shipdate@6] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4)) + +Query Stage #6 (4 -> 4): +ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_custkey] + +Query Stage #7 (4 -> 4): +ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "o_custkey", index: 4 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_orderkey@1, o_orderkey@0)], projection=[s_nationkey@0, l_extendedprice@2, l_discount@3, l_shipdate@4, o_custkey@6] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "l_orderkey", index: 1 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) + +Query Stage #8 (4 -> 4): +ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "s_nationkey", index: 0 }], 4)) + ProjectionExec: expr=[s_nationkey@1 as s_nationkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, l_shipdate@4 as l_shipdate, c_nationkey@0 as c_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@4)], projection=[c_nationkey@1, s_nationkey@2, l_extendedprice@3, l_discount@4, l_shipdate@5] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], filter=n_name@0 = GERMANY AND n_name@1 = IRAQ OR n_name@0 = IRAQ AND n_name@1 = GERMANY, projection=[n_name@1, l_extendedprice@2, l_discount@3, l_shipdate@4, n_name@6] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = IRAQ OR n_name@1 = GERMANY - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = IRAQ OR n_name@1 = GERMANY, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_shipdate@3 as l_shipdate, c_nationkey@4 as c_nationkey, n_name@0 as n_name] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@0)], projection=[n_name@1, l_extendedprice@3, l_discount@4, l_shipdate@5, c_nationkey@6] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = GERMANY OR n_name@1 = IRAQ - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = GERMANY OR n_name@1 = IRAQ, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] - ProjectionExec: expr=[s_nationkey@1 as s_nationkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, l_shipdate@4 as l_shipdate, c_nationkey@0 as c_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@4)], projection=[c_nationkey@1, s_nationkey@2, l_extendedprice@3, l_discount@4, l_shipdate@5] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(l_orderkey@1, o_orderkey@0)], projection=[s_nationkey@0, l_extendedprice@2, l_discount@3, l_shipdate@4, o_custkey@6] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5, l_shipdate@6] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@4 >= 1995-01-01 AND l_shipdate@4 <= 1996-12-31 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-01-01 AND l_shipdate@10 <= 1996-12-31, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 <= 1996-12-31 END, required_guarantees=[] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey] + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "o_custkey", index: 4 }], 4)) + +Query Stage #9 (4 -> 4): +ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "c_nationkey", index: 3 }], 4)) + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_shipdate@3 as l_shipdate, c_nationkey@4 as c_nationkey, n_name@0 as n_name] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@0)], projection=[n_name@1, l_extendedprice@3, l_discount@4, l_shipdate@5, c_nationkey@6] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "s_nationkey", index: 0 }], 4)) + +Query Stage #10 (4 -> 4): +ShuffleWriterExec(stage_id=10, output_partitioning=Hash([Column { name: "supp_nation", index: 0 }, Column { name: "cust_nation", index: 1 }, Column { name: "l_year", index: 2 }], 4)) + AggregateExec: mode=Partial, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[sum(shipping.volume)] + ProjectionExec: expr=[n_name@4 as supp_nation, n_name@0 as cust_nation, date_part(YEAR, l_shipdate@3) as l_year, l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as volume] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], filter=n_name@0 = GERMANY AND n_name@1 = IRAQ OR n_name@0 = IRAQ AND n_name@1 = GERMANY, projection=[n_name@1, l_extendedprice@2, l_discount@3, l_shipdate@4, n_name@6] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "c_nationkey", index: 3 }], 4)) + +Query Stage #11 (4 -> 4): +ShuffleWriterExec(stage_id=11, output_partitioning=Hash([Column { name: "supp_nation", index: 0 }, Column { name: "cust_nation", index: 1 }, Column { name: "l_year", index: 2 }], 4)) + SortExec: expr=[supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS LAST,l_year@2 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year, sum(shipping.volume)@3 as revenue] + AggregateExec: mode=FinalPartitioned, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[sum(shipping.volume)] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=10, input_partitioning=Hash([Column { name: "supp_nation", index: 0 }, Column { name: "cust_nation", index: 1 }, Column { name: "l_year", index: 2 }], 4)) + +Query Stage #12 (4 -> 1): +SortPreservingMergeExec: [supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS LAST,l_year@2 ASC NULLS LAST] + ShuffleReaderExec(stage_id=11, input_partitioning=Hash([Column { name: "supp_nation", index: 0 }, Column { name: "cust_nation", index: 1 }, Column { name: "l_year", index: 2 }], 4)) diff --git a/testdata/expected-plans/q8.txt b/testdata/expected-plans/q8.txt index ddd66ff..c346f6c 100644 --- a/testdata/expected-plans/q8.txt +++ b/testdata/expected-plans/q8.txt @@ -38,87 +38,203 @@ Sort: all_nations.o_year ASC NULLS LAST DataFusion Physical Plan ======================== -SortExec: expr=[o_year@0 ASC NULLS LAST], preserve_partitioning=[false] - ProjectionExec: expr=[o_year@0 as o_year, sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END)@1 / sum(all_nations.volume)@2 as mkt_share] - AggregateExec: mode=Single, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)] - ProjectionExec: expr=[date_part(YEAR, o_orderdate@2) as o_year, l_extendedprice@0 * (Some(1),20,0 - l_discount@1) as volume, n_name@3 as nation] +SortPreservingMergeExec: [o_year@0 ASC NULLS LAST] + SortExec: expr=[o_year@0 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[o_year@0 as o_year, sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END)@1 / sum(all_nations.volume)@2 as mkt_share] + AggregateExec: mode=FinalPartitioned, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, o_orderdate@3, n_name@5] - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = MIDDLE EAST - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = MIDDLE EAST, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= MIDDLE EAST AND MIDDLE EAST <= r_name_max@1 END, required_guarantees=[r_name in (MIDDLE EAST)] - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, o_orderdate@3 as o_orderdate, n_regionkey@4 as n_regionkey, n_name@0 as n_name] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, l_extendedprice@2, l_discount@3, o_orderdate@5, n_regionkey@6] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, n_regionkey@0 as n_regionkey] + RepartitionExec: partitioning=Hash([o_year@0], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)] + ProjectionExec: expr=[date_part(YEAR, o_orderdate@2) as o_year, l_extendedprice@0 * (Some(1),20,0 - l_discount@1) as volume, n_name@3 as nation] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, o_orderdate@3, n_name@5] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, c_nationkey@4)], projection=[n_regionkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@5] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_regionkey] - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, c_nationkey@0 as c_nationkey] + RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: r_name@1 = MIDDLE EAST + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = MIDDLE EAST, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= MIDDLE EAST AND MIDDLE EAST <= r_name_max@1 END, required_guarantees=[r_name in (MIDDLE EAST)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([n_regionkey@3], 4), input_partitions=4 + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, o_orderdate@3 as o_orderdate, n_regionkey@4 as n_regionkey, n_name@0 as n_name] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@3)], projection=[c_nationkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@6] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_nationkey] - ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, o_custkey@0 as o_custkey, o_orderdate@1 as o_orderdate] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_custkey@1, o_orderdate@2, l_extendedprice@4, l_discount@5, s_nationkey@6] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, l_extendedprice@2, l_discount@3, o_orderdate@5, n_regionkey@6] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4 + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, n_regionkey@0 as n_regionkey] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 >= 1995-01-01 AND o_orderdate@2 <= 1996-12-31 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1995-01-01 AND o_orderdate@4 <= 1996-12-31, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 <= 1996-12-31 END, required_guarantees=[] - ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_suppkey@3, l_extendedprice@4, l_discount@5] - ProjectionExec: expr=[p_partkey@0 as p_partkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_type@1 = LARGE PLATED STEEL - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_type], predicate=p_type@4 = LARGE PLATED STEEL, pruning_predicate=CASE WHEN p_type_null_count@2 = p_type_row_count@3 THEN false ELSE p_type_min@0 <= LARGE PLATED STEEL AND LARGE PLATED STEEL <= p_type_max@1 END, required_guarantees=[p_type in (LARGE PLATED STEEL)] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, c_nationkey@4)], projection=[n_regionkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@5] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([c_nationkey@4], 4), input_partitions=4 + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, c_nationkey@0 as c_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@3)], projection=[c_nationkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@6] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3390424], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3390424..6780848], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6780848..6782476, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3388796], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3388796..6779220]]}, projection=[c_custkey, c_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_custkey@3], 4), input_partitions=4 + ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, o_custkey@0 as o_custkey, o_orderdate@1 as o_orderdate] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_custkey@1, o_orderdate@2, l_extendedprice@4, l_discount@5, s_nationkey@6] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@2 >= 1995-01-01 AND o_orderdate@2 <= 1996-12-31 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1995-01-01 AND o_orderdate@4 <= 1996-12-31, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 <= 1996-12-31 END, required_guarantees=[] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 + ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_suppkey@1], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_suppkey@3, l_extendedprice@4, l_discount@5] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 + ProjectionExec: expr=[p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_type@1 = LARGE PLATED STEEL + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_type], predicate=p_type@4 = LARGE PLATED STEEL, pruning_predicate=CASE WHEN p_type_null_count@2 = p_type_row_count@3 THEN false ELSE p_type_min@0 <= LARGE PLATED STEEL AND LARGE PLATED STEEL <= p_type_max@1 END, required_guarantees=[p_type in (LARGE PLATED STEEL)] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_partkey@1], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] RaySQL Plan =========== -Query Stage #0 (1 -> 1): -SortExec: expr=[o_year@0 ASC NULLS LAST], preserve_partitioning=[false] - ProjectionExec: expr=[o_year@0 as o_year, sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END)@1 / sum(all_nations.volume)@2 as mkt_share] - AggregateExec: mode=Single, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)] - ProjectionExec: expr=[date_part(YEAR, o_orderdate@2) as o_year, l_extendedprice@0 * (Some(1),20,0 - l_discount@1) as volume, n_name@3 as nation] +Query Stage #0 (1 -> 4): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: r_name@1 = MIDDLE EAST + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = MIDDLE EAST, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= MIDDLE EAST AND MIDDLE EAST <= r_name_max@1 END, required_guarantees=[r_name in (MIDDLE EAST)] + +Query Stage #1 (1 -> 4): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] + +Query Stage #2 (1 -> 4): +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_regionkey] + +Query Stage #3 (4 -> 4): +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3390424], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3390424..6780848], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6780848..6782476, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3388796], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3388796..6779220]]}, projection=[c_custkey, c_nationkey] + +Query Stage #4 (4 -> 4): +ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@2 >= 1995-01-01 AND o_orderdate@2 <= 1996-12-31 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1995-01-01 AND o_orderdate@4 <= 1996-12-31, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 <= 1996-12-31 END, required_guarantees=[] + +Query Stage #5 (2 -> 4): +ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + +Query Stage #6 (2 -> 4): +ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) + ProjectionExec: expr=[p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_type@1 = LARGE PLATED STEEL + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_type], predicate=p_type@4 = LARGE PLATED STEEL, pruning_predicate=CASE WHEN p_type_null_count@2 = p_type_row_count@3 THEN false ELSE p_type_min@0 <= LARGE PLATED STEEL AND LARGE PLATED STEEL <= p_type_max@1 END, required_guarantees=[p_type in (LARGE PLATED STEEL)] + +Query Stage #7 (4 -> 4): +ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "l_partkey", index: 1 }], 4)) + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] + +Query Stage #8 (4 -> 4): +ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_suppkey@3, l_extendedprice@4, l_discount@5] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "l_partkey", index: 1 }], 4)) + +Query Stage #9 (4 -> 4): +ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) + ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, o_orderdate@3, n_name@5] - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = MIDDLE EAST - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = MIDDLE EAST, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= MIDDLE EAST AND MIDDLE EAST <= r_name_max@1 END, required_guarantees=[r_name in (MIDDLE EAST)] - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, o_orderdate@3 as o_orderdate, n_regionkey@4 as n_regionkey, n_name@0 as n_name] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, l_extendedprice@2, l_discount@3, o_orderdate@5, n_regionkey@6] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, n_regionkey@0 as n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, c_nationkey@4)], projection=[n_regionkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@5] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_regionkey] - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, c_nationkey@0 as c_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@3)], projection=[c_nationkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@6] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13212491]]}, projection=[c_custkey, c_nationkey] - ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, o_custkey@0 as o_custkey, o_orderdate@1 as o_orderdate] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_custkey@1, o_orderdate@2, l_extendedprice@4, l_discount@5, s_nationkey@6] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 >= 1995-01-01 AND o_orderdate@2 <= 1996-12-31 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1995-01-01 AND o_orderdate@4 <= 1996-12-31, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 <= 1996-12-31 END, required_guarantees=[] - ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_suppkey@3, l_extendedprice@4, l_discount@5] - ProjectionExec: expr=[p_partkey@0 as p_partkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_type@1 = LARGE PLATED STEEL - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_type], predicate=p_type@4 = LARGE PLATED STEEL, pruning_predicate=CASE WHEN p_type_null_count@2 = p_type_row_count@3 THEN false ELSE p_type_min@0 <= LARGE PLATED STEEL AND LARGE PLATED STEEL <= p_type_max@1 END, required_guarantees=[p_type in (LARGE PLATED STEEL)] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] + ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4)) + +Query Stage #10 (4 -> 4): +ShuffleWriterExec(stage_id=10, output_partitioning=Hash([Column { name: "o_custkey", index: 3 }], 4)) + ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, o_custkey@0 as o_custkey, o_orderdate@1 as o_orderdate] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_custkey@1, o_orderdate@2, l_extendedprice@4, l_discount@5, s_nationkey@6] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) + +Query Stage #11 (4 -> 4): +ShuffleWriterExec(stage_id=11, output_partitioning=Hash([Column { name: "c_nationkey", index: 4 }], 4)) + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, c_nationkey@0 as c_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@3)], projection=[c_nationkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@6] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=10, input_partitioning=Hash([Column { name: "o_custkey", index: 3 }], 4)) + +Query Stage #12 (4 -> 4): +ShuffleWriterExec(stage_id=12, output_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, n_regionkey@0 as n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, c_nationkey@4)], projection=[n_regionkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@5] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=11, input_partitioning=Hash([Column { name: "c_nationkey", index: 4 }], 4)) + +Query Stage #13 (4 -> 4): +ShuffleWriterExec(stage_id=13, output_partitioning=Hash([Column { name: "n_regionkey", index: 3 }], 4)) + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, o_orderdate@3 as o_orderdate, n_regionkey@4 as n_regionkey, n_name@0 as n_name] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, l_extendedprice@2, l_discount@3, o_orderdate@5, n_regionkey@6] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=12, input_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) + +Query Stage #14 (4 -> 4): +ShuffleWriterExec(stage_id=14, output_partitioning=Hash([Column { name: "o_year", index: 0 }], 4)) + AggregateExec: mode=Partial, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)] + ProjectionExec: expr=[date_part(YEAR, o_orderdate@2) as o_year, l_extendedprice@0 * (Some(1),20,0 - l_discount@1) as volume, n_name@3 as nation] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, o_orderdate@3, n_name@5] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=13, input_partitioning=Hash([Column { name: "n_regionkey", index: 3 }], 4)) + +Query Stage #15 (4 -> 4): +ShuffleWriterExec(stage_id=15, output_partitioning=Hash([Column { name: "o_year", index: 0 }], 4)) + SortExec: expr=[o_year@0 ASC NULLS LAST], preserve_partitioning=[true] + ProjectionExec: expr=[o_year@0 as o_year, sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END)@1 / sum(all_nations.volume)@2 as mkt_share] + AggregateExec: mode=FinalPartitioned, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=14, input_partitioning=Hash([Column { name: "o_year", index: 0 }], 4)) + +Query Stage #16 (4 -> 1): +SortPreservingMergeExec: [o_year@0 ASC NULLS LAST] + ShuffleReaderExec(stage_id=15, input_partitioning=Hash([Column { name: "o_year", index: 0 }], 4)) diff --git a/testdata/expected-plans/q9.txt b/testdata/expected-plans/q9.txt index ca953ca..5a38167 100644 --- a/testdata/expected-plans/q9.txt +++ b/testdata/expected-plans/q9.txt @@ -27,61 +27,148 @@ Sort: profit.nation ASC NULLS LAST, profit.o_year DESC NULLS FIRST DataFusion Physical Plan ======================== -SortExec: expr=[nation@0 ASC NULLS LAST,o_year@1 DESC], preserve_partitioning=[false] - ProjectionExec: expr=[nation@0 as nation, o_year@1 as o_year, sum(profit.amount)@2 as sum_profit] - AggregateExec: mode=Single, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)] - ProjectionExec: expr=[n_name@0 as nation, date_part(YEAR, o_orderdate@5) as o_year, l_extendedprice@2 * (Some(1),20,0 - l_discount@3) - ps_supplycost@4 * l_quantity@1 as amount] +SortPreservingMergeExec: [nation@0 ASC NULLS LAST,o_year@1 DESC] + SortExec: expr=[nation@0 ASC NULLS LAST,o_year@1 DESC], preserve_partitioning=[true] + ProjectionExec: expr=[nation@0 as nation, o_year@1 as o_year, sum(profit.amount)@2 as sum_profit] + AggregateExec: mode=FinalPartitioned, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[n_name@1, l_quantity@2, l_extendedprice@3, l_discount@4, ps_supplycost@6, o_orderdate@7] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] - ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, ps_supplycost@5 as ps_supplycost, o_orderdate@0 as o_orderdate] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, l_quantity@3, l_extendedprice@4, l_discount@5, s_nationkey@6, ps_supplycost@7] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_orderdate] - ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_quantity@2 as l_quantity, l_extendedprice@3 as l_extendedprice, l_discount@4 as l_discount, s_nationkey@5 as s_nationkey, ps_supplycost@0 as ps_supplycost] + RepartitionExec: partitioning=Hash([nation@0, o_year@1], 4), input_partitions=4 + AggregateExec: mode=Partial, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)] + ProjectionExec: expr=[n_name@0 as nation, date_part(YEAR, o_orderdate@5) as o_year, l_extendedprice@2 * (Some(1),20,0 - l_discount@3) - ps_supplycost@4 * l_quantity@1 as amount] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[n_name@1, l_quantity@2, l_extendedprice@3, l_discount@4, ps_supplycost@6, o_orderdate@7] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(ps_suppkey@1, l_suppkey@2), (ps_partkey@0, l_partkey@1)], projection=[ps_supplycost@2, l_orderkey@3, l_quantity@6, l_extendedprice@7, l_discount@8, s_nationkey@9] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] - ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_partkey@2 as l_partkey, l_suppkey@3 as l_suppkey, l_quantity@4 as l_quantity, l_extendedprice@5 as l_extendedprice, l_discount@6 as l_discount, s_nationkey@0 as s_nationkey] + RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_nationkey@3], 4), input_partitions=4 + ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, ps_supplycost@5 as ps_supplycost, o_orderdate@0 as o_orderdate] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@2)], projection=[s_nationkey@1, l_orderkey@2, l_partkey@3, l_suppkey@4, l_quantity@5, l_extendedprice@6, l_discount@7] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, l_quantity@3, l_extendedprice@4, l_discount@5, s_nationkey@6, ps_supplycost@7] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_orderdate] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_partkey@2, l_suppkey@3, l_quantity@4, l_extendedprice@5, l_discount@6] - ProjectionExec: expr=[p_partkey@0 as p_partkey] + RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 + ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_quantity@2 as l_quantity, l_extendedprice@3 as l_extendedprice, l_discount@4 as l_discount, s_nationkey@5 as s_nationkey, ps_supplycost@0 as ps_supplycost] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_name@1 LIKE %moccasin% - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE %moccasin% - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@1, l_suppkey@2), (ps_partkey@0, l_partkey@1)], projection=[ps_supplycost@2, l_orderkey@3, l_quantity@6, l_extendedprice@7, l_discount@8, s_nationkey@9] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([ps_suppkey@1, ps_partkey@0], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10445614], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10445614..20890506, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..722], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:722..10446336], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10446336..20891947]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_suppkey@2, l_partkey@1], 4), input_partitions=4 + ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_partkey@2 as l_partkey, l_suppkey@3 as l_suppkey, l_quantity@4 as l_quantity, l_extendedprice@5 as l_extendedprice, l_discount@6 as l_discount, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@2)], projection=[s_nationkey@1, l_orderkey@2, l_partkey@3, l_suppkey@4, l_quantity@5, l_extendedprice@6, l_discount@7] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_suppkey@2], 4), input_partitions=4 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_partkey@2, l_suppkey@3, l_quantity@4, l_extendedprice@5, l_discount@6] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 + ProjectionExec: expr=[p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_name@1 LIKE %moccasin% + RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE %moccasin% + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([l_partkey@1], 4), input_partitions=4 + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] RaySQL Plan =========== -Query Stage #0 (1 -> 1): -SortExec: expr=[nation@0 ASC NULLS LAST,o_year@1 DESC], preserve_partitioning=[false] - ProjectionExec: expr=[nation@0 as nation, o_year@1 as o_year, sum(profit.amount)@2 as sum_profit] - AggregateExec: mode=Single, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)] - ProjectionExec: expr=[n_name@0 as nation, date_part(YEAR, o_orderdate@5) as o_year, l_extendedprice@2 * (Some(1),20,0 - l_discount@3) - ps_supplycost@4 * l_quantity@1 as amount] +Query Stage #0 (1 -> 4): +ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] + +Query Stage #1 (4 -> 4): +ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_orderdate] + +Query Stage #2 (4 -> 4): +ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }, Column { name: "ps_partkey", index: 0 }], 4)) + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10445614], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10445614..20890506, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..722], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:722..10446336], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10446336..20891947]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + +Query Stage #3 (2 -> 4): +ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] + +Query Stage #4 (2 -> 4): +ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) + ProjectionExec: expr=[p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_name@1 LIKE %moccasin% + ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE %moccasin% + +Query Stage #5 (4 -> 4): +ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "l_partkey", index: 1 }], 4)) + ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] + +Query Stage #6 (4 -> 4): +ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "l_suppkey", index: 2 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_partkey@2, l_suppkey@3, l_quantity@4, l_extendedprice@5, l_discount@6] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "l_partkey", index: 1 }], 4)) + +Query Stage #7 (4 -> 4): +ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "l_suppkey", index: 2 }, Column { name: "l_partkey", index: 1 }], 4)) + ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_partkey@2 as l_partkey, l_suppkey@3 as l_suppkey, l_quantity@4 as l_quantity, l_extendedprice@5 as l_extendedprice, l_discount@6 as l_discount, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@2)], projection=[s_nationkey@1, l_orderkey@2, l_partkey@3, l_suppkey@4, l_quantity@5, l_extendedprice@6, l_discount@7] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[n_name@1, l_quantity@2, l_extendedprice@3, l_discount@4, ps_supplycost@6, o_orderdate@7] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] - ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, ps_supplycost@5 as ps_supplycost, o_orderdate@0 as o_orderdate] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, l_quantity@3, l_extendedprice@4, l_discount@5, s_nationkey@6, ps_supplycost@7] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54531250]]}, projection=[o_orderkey, o_orderdate] - ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_quantity@2 as l_quantity, l_extendedprice@3 as l_extendedprice, l_discount@4 as l_discount, s_nationkey@5 as s_nationkey, ps_supplycost@0 as ps_supplycost] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(ps_suppkey@1, l_suppkey@2), (ps_partkey@0, l_partkey@1)], projection=[ps_supplycost@2, l_orderkey@3, l_quantity@6, l_extendedprice@7, l_discount@8, s_nationkey@9] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41092331]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] - ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_partkey@2 as l_partkey, l_suppkey@3 as l_suppkey, l_quantity@4 as l_quantity, l_extendedprice@5 as l_extendedprice, l_discount@6 as l_discount, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@2)], projection=[s_nationkey@1, l_orderkey@2, l_partkey@3, l_suppkey@4, l_quantity@5, l_extendedprice@6, l_discount@7] - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_partkey@2, l_suppkey@3, l_quantity@4, l_extendedprice@5, l_discount@6] - ProjectionExec: expr=[p_partkey@0 as p_partkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_name@1 LIKE %moccasin% - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE %moccasin% - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208341192]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] + ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "l_suppkey", index: 2 }], 4)) + +Query Stage #8 (4 -> 4): +ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) + ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_quantity@2 as l_quantity, l_extendedprice@3 as l_extendedprice, l_discount@4 as l_discount, s_nationkey@5 as s_nationkey, ps_supplycost@0 as ps_supplycost] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@1, l_suppkey@2), (ps_partkey@0, l_partkey@1)], projection=[ps_supplycost@2, l_orderkey@3, l_quantity@6, l_extendedprice@7, l_discount@8, s_nationkey@9] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }, Column { name: "ps_partkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "l_suppkey", index: 2 }, Column { name: "l_partkey", index: 1 }], 4)) + +Query Stage #9 (4 -> 4): +ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4)) + ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, ps_supplycost@5 as ps_supplycost, o_orderdate@0 as o_orderdate] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, l_quantity@3, l_extendedprice@4, l_discount@5, s_nationkey@6, ps_supplycost@7] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) + +Query Stage #10 (4 -> 4): +ShuffleWriterExec(stage_id=10, output_partitioning=Hash([Column { name: "nation", index: 0 }, Column { name: "o_year", index: 1 }], 4)) + AggregateExec: mode=Partial, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)] + ProjectionExec: expr=[n_name@0 as nation, date_part(YEAR, o_orderdate@5) as o_year, l_extendedprice@2 * (Some(1),20,0 - l_discount@3) - ps_supplycost@4 * l_quantity@1 as amount] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[n_name@1, l_quantity@2, l_extendedprice@3, l_discount@4, ps_supplycost@6, o_orderdate@7] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4)) + +Query Stage #11 (4 -> 4): +ShuffleWriterExec(stage_id=11, output_partitioning=Hash([Column { name: "nation", index: 0 }, Column { name: "o_year", index: 1 }], 4)) + SortExec: expr=[nation@0 ASC NULLS LAST,o_year@1 DESC], preserve_partitioning=[true] + ProjectionExec: expr=[nation@0 as nation, o_year@1 as o_year, sum(profit.amount)@2 as sum_profit] + AggregateExec: mode=FinalPartitioned, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)] + CoalesceBatchesExec: target_batch_size=8192 + ShuffleReaderExec(stage_id=10, input_partitioning=Hash([Column { name: "nation", index: 0 }, Column { name: "o_year", index: 1 }], 4)) + +Query Stage #12 (4 -> 1): +SortPreservingMergeExec: [nation@0 ASC NULLS LAST,o_year@1 DESC] + ShuffleReaderExec(stage_id=11, input_partitioning=Hash([Column { name: "nation", index: 0 }, Column { name: "o_year", index: 1 }], 4)) From e7a052d7aa7b65aade0178a469b3e9e498dc3d93 Mon Sep 17 00:00:00 2001 From: Edmondo Porcu Date: Thu, 3 Oct 2024 17:32:10 -0400 Subject: [PATCH 09/14] Restored partiition count --- .github/workflows/rust.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 7ac8057..329df75 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -8,7 +8,7 @@ env: CARGO_TERM_COLOR: always PYTHON_VERSION: 3.9 TPCH_SCALING_FACTOR: "1" - TPCH_TEST_PARTITIONS: "1" + TPCH_TEST_PARTITIONS: "2" TPCH_DATA_PATH: "data" jobs: From 30963ea5e8fb2925df98236c261873bffa58b8e1 Mon Sep 17 00:00:00 2001 From: Edmondo Porcu Date: Thu, 3 Oct 2024 18:02:34 -0400 Subject: [PATCH 10/14] Will tests eventually pass? --- .github/workflows/rust.yml | 2 +- src/planner.rs | 2 +- testdata/expected-plans/q1.txt | 41 ++--- testdata/expected-plans/q10.txt | 137 +++++---------- testdata/expected-plans/q11.txt | 191 ++++++--------------- testdata/expected-plans/q12.txt | 71 +++----- testdata/expected-plans/q13.txt | 79 +++------ testdata/expected-plans/q14.txt | 59 ++----- testdata/expected-plans/q16.txt | 120 ++++--------- testdata/expected-plans/q17.txt | 82 +++------ testdata/expected-plans/q18.txt | 106 +++--------- testdata/expected-plans/q19.txt | 61 +++---- testdata/expected-plans/q2.txt | 292 +++++++++----------------------- testdata/expected-plans/q20.txt | 155 +++++------------ testdata/expected-plans/q21.txt | 196 +++++++-------------- testdata/expected-plans/q22.txt | 103 ++++------- testdata/expected-plans/q3.txt | 113 ++++-------- testdata/expected-plans/q4.txt | 83 +++------ testdata/expected-plans/q5.txt | 191 ++++++--------------- testdata/expected-plans/q6.txt | 26 +-- testdata/expected-plans/q7.txt | 196 ++++++--------------- testdata/expected-plans/q8.txt | 264 ++++++++--------------------- testdata/expected-plans/q9.txt | 181 +++++--------------- 23 files changed, 785 insertions(+), 1966 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 329df75..7ac8057 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -8,7 +8,7 @@ env: CARGO_TERM_COLOR: always PYTHON_VERSION: 3.9 TPCH_SCALING_FACTOR: "1" - TPCH_TEST_PARTITIONS: "2" + TPCH_TEST_PARTITIONS: "1" TPCH_DATA_PATH: "data" jobs: diff --git a/src/planner.rs b/src/planner.rs index 5c58663..40934fe 100644 --- a/src/planner.rs +++ b/src/planner.rs @@ -384,7 +384,7 @@ mod test { let data_path = env::var("TPCH_DATA_PATH")?; let file = format!("testdata/queries/q{n}.sql"); let sql = fs::read_to_string(&file)?; - let config = SessionConfig::new().with_target_partitions(4); + let config = SessionConfig::new().with_target_partitions(1); let ctx = SessionContext::with_config(config); let tables = &[ "customer", "lineitem", "nation", "orders", "part", "partsupp", "region", "supplier", diff --git a/testdata/expected-plans/q1.txt b/testdata/expected-plans/q1.txt index d0b5599..f37ef3a 100644 --- a/testdata/expected-plans/q1.txt +++ b/testdata/expected-plans/q1.txt @@ -11,38 +11,23 @@ Sort: lineitem.l_returnflag ASC NULLS LAST, lineitem.l_linestatus ASC NULLS LAST DataFusion Physical Plan ======================== -SortPreservingMergeExec: [l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST] - SortExec: expr=[l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus, sum(lineitem.l_quantity)@2 as sum_qty, sum(lineitem.l_extendedprice)@3 as sum_base_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@4 as sum_disc_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax)@5 as sum_charge, avg(lineitem.l_quantity)@6 as avg_qty, avg(lineitem.l_extendedprice)@7 as avg_price, avg(lineitem.l_discount)@8 as avg_disc, count(*)@9 as count_order] - AggregateExec: mode=FinalPartitioned, gby=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(*)] +SortExec: expr=[l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus, sum(lineitem.l_quantity)@2 as sum_qty, sum(lineitem.l_extendedprice)@3 as sum_base_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@4 as sum_disc_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax)@5 as sum_charge, avg(lineitem.l_quantity)@6 as avg_qty, avg(lineitem.l_extendedprice)@7 as avg_price, avg(lineitem.l_discount)@8 as avg_disc, count(*)@9 as count_order] + AggregateExec: mode=Single, gby=[l_returnflag@5 as l_returnflag, l_linestatus@6 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(*)] + ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, l_quantity@0 as l_quantity, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_tax@3 as l_tax, l_returnflag@4 as l_returnflag, l_linestatus@5 as l_linestatus] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_returnflag@0, l_linestatus@1], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[l_returnflag@5 as l_returnflag, l_linestatus@6 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(*)] - ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, l_quantity@0 as l_quantity, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_tax@3 as l_tax, l_returnflag@4 as l_returnflag, l_linestatus@5 as l_linestatus] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@6 <= 1998-09-24 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], predicate=l_shipdate@10 <= 1998-09-24, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@0 <= 1998-09-24 END, required_guarantees=[] + FilterExec: l_shipdate@6 <= 1998-09-24 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], predicate=l_shipdate@10 <= 1998-09-24, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@0 <= 1998-09-24 END, required_guarantees=[] RaySQL Plan =========== -Query Stage #0 (4 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "l_returnflag", index: 0 }, Column { name: "l_linestatus", index: 1 }], 4)) - AggregateExec: mode=Partial, gby=[l_returnflag@5 as l_returnflag, l_linestatus@6 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(*)] - ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, l_quantity@0 as l_quantity, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_tax@3 as l_tax, l_returnflag@4 as l_returnflag, l_linestatus@5 as l_linestatus] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@6 <= 1998-09-24 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], predicate=l_shipdate@10 <= 1998-09-24, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@0 <= 1998-09-24 END, required_guarantees=[] - -Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "l_returnflag", index: 0 }, Column { name: "l_linestatus", index: 1 }], 4)) - SortExec: expr=[l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus, sum(lineitem.l_quantity)@2 as sum_qty, sum(lineitem.l_extendedprice)@3 as sum_base_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@4 as sum_disc_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax)@5 as sum_charge, avg(lineitem.l_quantity)@6 as avg_qty, avg(lineitem.l_extendedprice)@7 as avg_price, avg(lineitem.l_discount)@8 as avg_disc, count(*)@9 as count_order] - AggregateExec: mode=FinalPartitioned, gby=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(*)] +Query Stage #0 (1 -> 1): +SortExec: expr=[l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus, sum(lineitem.l_quantity)@2 as sum_qty, sum(lineitem.l_extendedprice)@3 as sum_base_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@4 as sum_disc_price, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax)@5 as sum_charge, avg(lineitem.l_quantity)@6 as avg_qty, avg(lineitem.l_extendedprice)@7 as avg_price, avg(lineitem.l_discount)@8 as avg_disc, count(*)@9 as count_order] + AggregateExec: mode=Single, gby=[l_returnflag@5 as l_returnflag, l_linestatus@6 as l_linestatus], aggr=[sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), avg(lineitem.l_quantity), avg(lineitem.l_extendedprice), avg(lineitem.l_discount), count(*)] + ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, l_quantity@0 as l_quantity, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_tax@3 as l_tax, l_returnflag@4 as l_returnflag, l_linestatus@5 as l_linestatus] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "l_returnflag", index: 0 }, Column { name: "l_linestatus", index: 1 }], 4)) - -Query Stage #2 (4 -> 1): -SortPreservingMergeExec: [l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST] - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "l_returnflag", index: 0 }, Column { name: "l_linestatus", index: 1 }], 4)) + FilterExec: l_shipdate@6 <= 1998-09-24 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], predicate=l_shipdate@10 <= 1998-09-24, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@0 <= 1998-09-24 END, required_guarantees=[] diff --git a/testdata/expected-plans/q10.txt b/testdata/expected-plans/q10.txt index 08ebdfc..e9e4c67 100644 --- a/testdata/expected-plans/q10.txt +++ b/testdata/expected-plans/q10.txt @@ -23,108 +23,49 @@ Limit: skip=0, fetch=20 DataFusion Physical Plan ======================== -GlobalLimitExec: skip=0, fetch=20 - SortPreservingMergeExec: [revenue@2 DESC], fetch=20 - SortExec: TopK(fetch=20), expr=[revenue@2 DESC], preserve_partitioning=[true] - ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@7 as revenue, c_acctbal@2 as c_acctbal, n_name@4 as n_name, c_address@5 as c_address, c_phone@3 as c_phone, c_comment@6 as c_comment] - AggregateExec: mode=FinalPartitioned, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@2 as c_acctbal, c_phone@3 as c_phone, n_name@4 as n_name, c_address@5 as c_address, c_comment@6 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([c_custkey@0, c_name@1, c_acctbal@2, c_phone@3, n_name@4, c_address@5, c_comment@6], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@4 as c_acctbal, c_phone@3 as c_phone, n_name@8 as n_name, c_address@2 as c_address, c_comment@5 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - ProjectionExec: expr=[c_custkey@1 as c_custkey, c_name@2 as c_name, c_address@3 as c_address, c_phone@4 as c_phone, c_acctbal@5 as c_acctbal, c_comment@6 as c_comment, l_extendedprice@7 as l_extendedprice, l_discount@8 as l_discount, n_name@0 as n_name] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], projection=[n_name@1, c_custkey@2, c_name@3, c_address@4, c_phone@6, c_acctbal@7, c_comment@8, l_extendedprice@9, l_discount@10] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] +SortExec: TopK(fetch=20), expr=[revenue@2 DESC], preserve_partitioning=[false] + ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@7 as revenue, c_acctbal@2 as c_acctbal, n_name@4 as n_name, c_address@5 as c_address, c_phone@3 as c_phone, c_comment@6 as c_comment] + AggregateExec: mode=Single, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@4 as c_acctbal, c_phone@3 as c_phone, n_name@8 as n_name, c_address@2 as c_address, c_comment@5 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + ProjectionExec: expr=[c_custkey@1 as c_custkey, c_name@2 as c_name, c_address@3 as c_address, c_phone@4 as c_phone, c_acctbal@5 as c_acctbal, c_comment@6 as c_comment, l_extendedprice@7 as l_extendedprice, l_discount@8 as l_discount, n_name@0 as n_name] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], projection=[n_name@1, c_custkey@2, c_name@3, c_address@4, c_phone@6, c_acctbal@7, c_comment@8, l_extendedprice@9, l_discount@10] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@7, l_orderkey@0)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, l_extendedprice@9, l_discount@10] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, o_orderkey@7] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13211178]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] + ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([c_nationkey@3], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@7, l_orderkey@0)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, l_extendedprice@9, l_discount@10] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_orderkey@7], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, o_orderkey@7] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3390424], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3390424..6780848], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6780848..6782476, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3388796], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3388796..6779220]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4 - ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 >= 1993-07-01 AND o_orderdate@2 < 1993-10-01 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1993-07-01 AND o_orderdate@4 < 1993-10-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1993-07-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1993-10-01 END, required_guarantees=[] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_returnflag@3 = R - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], predicate=l_returnflag@8 = R, pruning_predicate=CASE WHEN l_returnflag_null_count@2 = l_returnflag_row_count@3 THEN false ELSE l_returnflag_min@0 <= R AND R <= l_returnflag_max@1 END, required_guarantees=[l_returnflag in (R)] + FilterExec: o_orderdate@2 >= 1993-07-01 AND o_orderdate@2 < 1993-10-01 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54530383]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1993-07-01 AND o_orderdate@4 < 1993-10-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1993-07-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1993-10-01 END, required_guarantees=[] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_returnflag@3 = R + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], predicate=l_returnflag@8 = R, pruning_predicate=CASE WHEN l_returnflag_null_count@2 = l_returnflag_row_count@3 THEN false ELSE l_returnflag_min@0 <= R AND R <= l_returnflag_max@1 END, required_guarantees=[l_returnflag in (R)] RaySQL Plan =========== -Query Stage #0 (1 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] - -Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3390424], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3390424..6780848], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6780848..6782476, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3388796], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3388796..6779220]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] - -Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) - ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 >= 1993-07-01 AND o_orderdate@2 < 1993-10-01 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1993-07-01 AND o_orderdate@4 < 1993-10-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1993-07-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1993-10-01 END, required_guarantees=[] - -Query Stage #3 (4 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "o_orderkey", index: 7 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, o_orderkey@7] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) - -Query Stage #4 (4 -> 4): -ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_returnflag@3 = R - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], predicate=l_returnflag@8 = R, pruning_predicate=CASE WHEN l_returnflag_null_count@2 = l_returnflag_row_count@3 THEN false ELSE l_returnflag_min@0 <= R AND R <= l_returnflag_max@1 END, required_guarantees=[l_returnflag in (R)] - -Query Stage #5 (4 -> 4): -ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "c_nationkey", index: 3 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@7, l_orderkey@0)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, l_extendedprice@9, l_discount@10] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "o_orderkey", index: 7 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - -Query Stage #6 (4 -> 4): -ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }, Column { name: "c_name", index: 1 }, Column { name: "c_acctbal", index: 2 }, Column { name: "c_phone", index: 3 }, Column { name: "n_name", index: 4 }, Column { name: "c_address", index: 5 }, Column { name: "c_comment", index: 6 }], 4)) - AggregateExec: mode=Partial, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@4 as c_acctbal, c_phone@3 as c_phone, n_name@8 as n_name, c_address@2 as c_address, c_comment@5 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - ProjectionExec: expr=[c_custkey@1 as c_custkey, c_name@2 as c_name, c_address@3 as c_address, c_phone@4 as c_phone, c_acctbal@5 as c_acctbal, c_comment@6 as c_comment, l_extendedprice@7 as l_extendedprice, l_discount@8 as l_discount, n_name@0 as n_name] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], projection=[n_name@1, c_custkey@2, c_name@3, c_address@4, c_phone@6, c_acctbal@7, c_comment@8, l_extendedprice@9, l_discount@10] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "c_nationkey", index: 3 }], 4)) - -Query Stage #7 (4 -> 4): -ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }, Column { name: "c_name", index: 1 }, Column { name: "c_acctbal", index: 3 }, Column { name: "c_phone", index: 6 }, Column { name: "n_name", index: 4 }, Column { name: "c_address", index: 5 }, Column { name: "c_comment", index: 7 }], 4)) - SortExec: TopK(fetch=20), expr=[revenue@2 DESC], preserve_partitioning=[true] - ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@7 as revenue, c_acctbal@2 as c_acctbal, n_name@4 as n_name, c_address@5 as c_address, c_phone@3 as c_phone, c_comment@6 as c_comment] - AggregateExec: mode=FinalPartitioned, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@2 as c_acctbal, c_phone@3 as c_phone, n_name@4 as n_name, c_address@5 as c_address, c_comment@6 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] +Query Stage #0 (1 -> 1): +SortExec: TopK(fetch=20), expr=[revenue@2 DESC], preserve_partitioning=[false] + ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@7 as revenue, c_acctbal@2 as c_acctbal, n_name@4 as n_name, c_address@5 as c_address, c_phone@3 as c_phone, c_comment@6 as c_comment] + AggregateExec: mode=Single, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@4 as c_acctbal, c_phone@3 as c_phone, n_name@8 as n_name, c_address@2 as c_address, c_comment@5 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + ProjectionExec: expr=[c_custkey@1 as c_custkey, c_name@2 as c_name, c_address@3 as c_address, c_phone@4 as c_phone, c_acctbal@5 as c_acctbal, c_comment@6 as c_comment, l_extendedprice@7 as l_extendedprice, l_discount@8 as l_discount, n_name@0 as n_name] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }, Column { name: "c_name", index: 1 }, Column { name: "c_acctbal", index: 2 }, Column { name: "c_phone", index: 3 }, Column { name: "n_name", index: 4 }, Column { name: "c_address", index: 5 }, Column { name: "c_comment", index: 6 }], 4)) - -Query Stage #8 (1 -> 1): -GlobalLimitExec: skip=0, fetch=20 - SortPreservingMergeExec: [revenue@2 DESC], fetch=20 - ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }, Column { name: "c_name", index: 1 }, Column { name: "c_acctbal", index: 3 }, Column { name: "c_phone", index: 6 }, Column { name: "n_name", index: 4 }, Column { name: "c_address", index: 5 }, Column { name: "c_comment", index: 7 }], 4)) + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], projection=[n_name@1, c_custkey@2, c_name@3, c_address@4, c_phone@6, c_acctbal@7, c_comment@8, l_extendedprice@9, l_discount@10] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@7, l_orderkey@0)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, l_extendedprice@9, l_discount@10] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, o_orderkey@7] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13211178]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] + ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@2 >= 1993-07-01 AND o_orderdate@2 < 1993-10-01 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54530383]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1993-07-01 AND o_orderdate@4 < 1993-10-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1993-07-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1993-10-01 END, required_guarantees=[] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_returnflag@3 = R + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], predicate=l_returnflag@8 = R, pruning_predicate=CASE WHEN l_returnflag_null_count@2 = l_returnflag_row_count@3 THEN false ELSE l_returnflag_min@0 <= R AND R <= l_returnflag_max@1 END, required_guarantees=[l_returnflag in (R)] diff --git a/testdata/expected-plans/q11.txt b/testdata/expected-plans/q11.txt index b32d75a..8da9394 100644 --- a/testdata/expected-plans/q11.txt +++ b/testdata/expected-plans/q11.txt @@ -30,148 +30,65 @@ Sort: value DESC NULLS FIRST DataFusion Physical Plan ======================== -SortPreservingMergeExec: [value@1 DESC] - SortExec: expr=[value@1 DESC], preserve_partitioning=[true] - ProjectionExec: expr=[ps_partkey@1 as ps_partkey, sum(partsupp.ps_supplycost * partsupp.ps_availqty)@2 as value] - NestedLoopJoinExec: join_type=Inner, filter=CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Decimal128(38, 15)) > sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)@1 - ProjectionExec: expr=[CAST(CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 AS Decimal128(38, 15)) as sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)] - AggregateExec: mode=Final, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] - CoalescePartitionsExec - AggregateExec: mode=Partial, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[ps_availqty@1, ps_supplycost@2] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 - ProjectionExec: expr=[n_nationkey@0 as n_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = ALGERIA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4 - ProjectionExec: expr=[ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_nationkey@1, ps_availqty@3, ps_supplycost@4] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10445614], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10445614..20890506, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..722], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:722..10446336], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10446336..20891947]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost] - AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] +SortExec: expr=[value@1 DESC], preserve_partitioning=[false] + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, sum(partsupp.ps_supplycost * partsupp.ps_availqty)@2 as value] + NestedLoopJoinExec: join_type=Inner, filter=CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Decimal128(38, 15)) > sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)@1 + ProjectionExec: expr=[CAST(CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 AS Decimal128(38, 15)) as sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)] + AggregateExec: mode=Single, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[ps_availqty@1, ps_supplycost@2] + ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = ALGERIA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] + ProjectionExec: expr=[ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[ps_partkey@1, ps_availqty@2, ps_supplycost@3] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 - ProjectionExec: expr=[n_nationkey@0 as n_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = ALGERIA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_nationkey@3], 4), input_partitions=4 - ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_availqty@2 as ps_availqty, ps_supplycost@3 as ps_supplycost, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_availqty@4, ps_supplycost@5] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([ps_suppkey@1], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10445614], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10445614..20890506, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..722], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:722..10446336], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10446336..20891947]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_nationkey@1, ps_availqty@3, ps_supplycost@4] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41091907]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost] + AggregateExec: mode=Single, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[ps_partkey@1, ps_availqty@2, ps_supplycost@3] + ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = ALGERIA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_availqty@2 as ps_availqty, ps_supplycost@3 as ps_supplycost, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_availqty@4, ps_supplycost@5] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41091907]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] RaySQL Plan =========== -Query Stage #0 (1 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ProjectionExec: expr=[n_nationkey@0 as n_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = ALGERIA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] - -Query Stage #1 (2 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] - -Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10445614], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10445614..20890506, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..722], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:722..10446336], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10446336..20891947]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost] - -Query Stage #3 (4 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) - ProjectionExec: expr=[ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_nationkey@1, ps_availqty@3, ps_supplycost@4] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4)) - -Query Stage #4 (4 -> 1): -ShuffleWriterExec(stage_id=4, output_partitioning=Hash([], 4)) - AggregateExec: mode=Partial, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[ps_availqty@1, ps_supplycost@2] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) - -Query Stage #5 (1 -> 4): -ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ProjectionExec: expr=[n_nationkey@0 as n_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = ALGERIA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] - -Query Stage #6 (2 -> 4): -ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] - -Query Stage #7 (4 -> 4): -ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10445614], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10445614..20890506, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..722], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:722..10446336], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10446336..20891947]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] - -Query Stage #8 (4 -> 4): -ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4)) - ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_availqty@2 as ps_availqty, ps_supplycost@3 as ps_supplycost, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_availqty@4, ps_supplycost@5] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }], 4)) - -Query Stage #9 (4 -> 4): -ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) - AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[ps_partkey@1, ps_availqty@2, ps_supplycost@3] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4)) - -Query Stage #10 (4 -> 4): -ShuffleWriterExec(stage_id=10, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) - SortExec: expr=[value@1 DESC], preserve_partitioning=[true] - ProjectionExec: expr=[ps_partkey@1 as ps_partkey, sum(partsupp.ps_supplycost * partsupp.ps_availqty)@2 as value] - NestedLoopJoinExec: join_type=Inner, filter=CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Decimal128(38, 15)) > sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)@1 - ProjectionExec: expr=[CAST(CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 AS Decimal128(38, 15)) as sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)] - AggregateExec: mode=Final, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] - CoalescePartitionsExec - ShuffleReaderExec(stage_id=4, input_partitioning=Hash([], 4)) - AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] +Query Stage #0 (1 -> 1): +SortExec: expr=[value@1 DESC], preserve_partitioning=[false] + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, sum(partsupp.ps_supplycost * partsupp.ps_availqty)@2 as value] + NestedLoopJoinExec: join_type=Inner, filter=CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Decimal128(38, 15)) > sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)@1 + ProjectionExec: expr=[CAST(CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 AS Decimal128(38, 15)) as sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)] + AggregateExec: mode=Single, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) - -Query Stage #11 (4 -> 1): -SortPreservingMergeExec: [value@1 DESC] - ShuffleReaderExec(stage_id=10, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[ps_availqty@1, ps_supplycost@2] + ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = ALGERIA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] + ProjectionExec: expr=[ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_nationkey@1, ps_availqty@3, ps_supplycost@4] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41091907]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost] + AggregateExec: mode=Single, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[ps_partkey@1, ps_availqty@2, ps_supplycost@3] + ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = ALGERIA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ALGERIA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ALGERIA AND ALGERIA <= n_name_max@1 END, required_guarantees=[n_name in (ALGERIA)] + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_availqty@2 as ps_availqty, ps_supplycost@3 as ps_supplycost, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_availqty@4, ps_supplycost@5] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41091907]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] diff --git a/testdata/expected-plans/q12.txt b/testdata/expected-plans/q12.txt index 3b1a165..cf5a5bd 100644 --- a/testdata/expected-plans/q12.txt +++ b/testdata/expected-plans/q12.txt @@ -14,60 +14,31 @@ Sort: lineitem.l_shipmode ASC NULLS LAST DataFusion Physical Plan ======================== -SortPreservingMergeExec: [l_shipmode@0 ASC NULLS LAST] - SortExec: expr=[l_shipmode@0 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[l_shipmode@0 as l_shipmode, sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)@1 as high_line_count, sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)@2 as low_line_count] - AggregateExec: mode=FinalPartitioned, gby=[l_shipmode@0 as l_shipmode], aggr=[sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)] +SortExec: expr=[l_shipmode@0 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[l_shipmode@0 as l_shipmode, sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)@1 as high_line_count, sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)@2 as low_line_count] + AggregateExec: mode=Single, gby=[l_shipmode@1 as l_shipmode], aggr=[sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)] + ProjectionExec: expr=[o_orderpriority@1 as o_orderpriority, l_shipmode@0 as l_shipmode] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_shipmode@0], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[l_shipmode@1 as l_shipmode], aggr=[sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)] - ProjectionExec: expr=[o_orderpriority@1 as o_orderpriority, l_shipmode@0 as l_shipmode] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_orderkey@0, o_orderkey@0)], projection=[l_shipmode@1, o_orderpriority@3] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_shipmode@4 as l_shipmode] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: (l_shipmode@4 = FOB OR l_shipmode@4 = SHIP) AND l_receiptdate@3 > l_commitdate@2 AND l_shipdate@1 < l_commitdate@2 AND l_receiptdate@3 >= 1995-01-01 AND l_receiptdate@3 < 1996-01-01 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode], predicate=(l_shipmode@14 = FOB OR l_shipmode@14 = SHIP) AND l_receiptdate@12 > l_commitdate@11 AND l_shipdate@10 < l_commitdate@11 AND l_receiptdate@12 >= 1995-01-01 AND l_receiptdate@12 < 1996-01-01, pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= FOB AND FOB <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= SHIP AND SHIP <= l_shipmode_max@1 END) AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_max@4 >= 1995-01-01 END AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_min@7 < 1996-01-01 END, required_guarantees=[l_shipmode in (SHIP, FOB)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_orderpriority] + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(l_orderkey@0, o_orderkey@0)], projection=[l_shipmode@1, o_orderpriority@3] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_shipmode@4 as l_shipmode] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: (l_shipmode@4 = FOB OR l_shipmode@4 = SHIP) AND l_receiptdate@3 > l_commitdate@2 AND l_shipdate@1 < l_commitdate@2 AND l_receiptdate@3 >= 1995-01-01 AND l_receiptdate@3 < 1996-01-01 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode], predicate=(l_shipmode@14 = FOB OR l_shipmode@14 = SHIP) AND l_receiptdate@12 > l_commitdate@11 AND l_shipdate@10 < l_commitdate@11 AND l_receiptdate@12 >= 1995-01-01 AND l_receiptdate@12 < 1996-01-01, pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= FOB AND FOB <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= SHIP AND SHIP <= l_shipmode_max@1 END) AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_max@4 >= 1995-01-01 END AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_min@7 < 1996-01-01 END, required_guarantees=[l_shipmode in (SHIP, FOB)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54530383]]}, projection=[o_orderkey, o_orderpriority] RaySQL Plan =========== -Query Stage #0 (4 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_shipmode@4 as l_shipmode] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: (l_shipmode@4 = FOB OR l_shipmode@4 = SHIP) AND l_receiptdate@3 > l_commitdate@2 AND l_shipdate@1 < l_commitdate@2 AND l_receiptdate@3 >= 1995-01-01 AND l_receiptdate@3 < 1996-01-01 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode], predicate=(l_shipmode@14 = FOB OR l_shipmode@14 = SHIP) AND l_receiptdate@12 > l_commitdate@11 AND l_shipdate@10 < l_commitdate@11 AND l_receiptdate@12 >= 1995-01-01 AND l_receiptdate@12 < 1996-01-01, pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= FOB AND FOB <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= SHIP AND SHIP <= l_shipmode_max@1 END) AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_max@4 >= 1995-01-01 END AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_min@7 < 1996-01-01 END, required_guarantees=[l_shipmode in (SHIP, FOB)] - -Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_orderpriority] - -Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "l_shipmode", index: 0 }], 4)) - AggregateExec: mode=Partial, gby=[l_shipmode@1 as l_shipmode], aggr=[sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)] - ProjectionExec: expr=[o_orderpriority@1 as o_orderpriority, l_shipmode@0 as l_shipmode] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_orderkey@0, o_orderkey@0)], projection=[l_shipmode@1, o_orderpriority@3] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - -Query Stage #3 (4 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "l_shipmode", index: 0 }], 4)) - SortExec: expr=[l_shipmode@0 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[l_shipmode@0 as l_shipmode, sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)@1 as high_line_count, sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)@2 as low_line_count] - AggregateExec: mode=FinalPartitioned, gby=[l_shipmode@0 as l_shipmode], aggr=[sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)] +Query Stage #0 (1 -> 1): +SortExec: expr=[l_shipmode@0 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[l_shipmode@0 as l_shipmode, sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)@1 as high_line_count, sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)@2 as low_line_count] + AggregateExec: mode=Single, gby=[l_shipmode@1 as l_shipmode], aggr=[sum(CASE WHEN orders.o_orderpriority = Utf8("1-URGENT") OR orders.o_orderpriority = Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END), sum(CASE WHEN orders.o_orderpriority != Utf8("1-URGENT") AND orders.o_orderpriority != Utf8("2-HIGH") THEN Int64(1) ELSE Int64(0) END)] + ProjectionExec: expr=[o_orderpriority@1 as o_orderpriority, l_shipmode@0 as l_shipmode] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "l_shipmode", index: 0 }], 4)) - -Query Stage #4 (4 -> 1): -SortPreservingMergeExec: [l_shipmode@0 ASC NULLS LAST] - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "l_shipmode", index: 0 }], 4)) + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(l_orderkey@0, o_orderkey@0)], projection=[l_shipmode@1, o_orderpriority@3] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_shipmode@4 as l_shipmode] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: (l_shipmode@4 = FOB OR l_shipmode@4 = SHIP) AND l_receiptdate@3 > l_commitdate@2 AND l_shipdate@1 < l_commitdate@2 AND l_receiptdate@3 >= 1995-01-01 AND l_receiptdate@3 < 1996-01-01 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_orderkey, l_shipdate, l_commitdate, l_receiptdate, l_shipmode], predicate=(l_shipmode@14 = FOB OR l_shipmode@14 = SHIP) AND l_receiptdate@12 > l_commitdate@11 AND l_shipdate@10 < l_commitdate@11 AND l_receiptdate@12 >= 1995-01-01 AND l_receiptdate@12 < 1996-01-01, pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= FOB AND FOB <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= SHIP AND SHIP <= l_shipmode_max@1 END) AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_max@4 >= 1995-01-01 END AND CASE WHEN l_receiptdate_null_count@5 = l_receiptdate_row_count@6 THEN false ELSE l_receiptdate_min@7 < 1996-01-01 END, required_guarantees=[l_shipmode in (SHIP, FOB)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54530383]]}, projection=[o_orderkey, o_orderpriority] diff --git a/testdata/expected-plans/q13.txt b/testdata/expected-plans/q13.txt index cee4109..99712a4 100644 --- a/testdata/expected-plans/q13.txt +++ b/testdata/expected-plans/q13.txt @@ -17,62 +17,33 @@ Sort: custdist DESC NULLS FIRST, c_orders.c_count DESC NULLS FIRST DataFusion Physical Plan ======================== -SortPreservingMergeExec: [custdist@1 DESC,c_count@0 DESC] - SortExec: expr=[custdist@1 DESC,c_count@0 DESC], preserve_partitioning=[true] - ProjectionExec: expr=[c_count@0 as c_count, count(*)@1 as custdist] - AggregateExec: mode=FinalPartitioned, gby=[c_count@0 as c_count], aggr=[count(*)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([c_count@0], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[c_count@0 as c_count], aggr=[count(*)] - ProjectionExec: expr=[count(orders.o_orderkey)@1 as c_count] - AggregateExec: mode=SinglePartitioned, gby=[c_custkey@0 as c_custkey], aggr=[count(orders.o_orderkey)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Left, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, o_orderkey@1] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3390424], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3390424..6780848], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6780848..6782476, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3388796], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3388796..6779220]]}, projection=[c_custkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4 - ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_comment@2 NOT LIKE %express%requests% - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_custkey, o_comment], predicate=o_comment@8 NOT LIKE %express%requests% +SortExec: expr=[custdist@1 DESC,c_count@0 DESC], preserve_partitioning=[false] + ProjectionExec: expr=[c_count@0 as c_count, count(*)@1 as custdist] + AggregateExec: mode=Single, gby=[c_count@0 as c_count], aggr=[count(*)] + ProjectionExec: expr=[count(orders.o_orderkey)@1 as c_count] + AggregateExec: mode=Single, gby=[c_custkey@0 as c_custkey], aggr=[count(orders.o_orderkey)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Left, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, o_orderkey@1] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13211178]]}, projection=[c_custkey] + ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_comment@2 NOT LIKE %express%requests% + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54530383]]}, projection=[o_orderkey, o_custkey, o_comment], predicate=o_comment@8 NOT LIKE %express%requests% RaySQL Plan =========== -Query Stage #0 (4 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3390424], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3390424..6780848], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6780848..6782476, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3388796], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3388796..6779220]]}, projection=[c_custkey] - -Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) - ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_comment@2 NOT LIKE %express%requests% - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_custkey, o_comment], predicate=o_comment@8 NOT LIKE %express%requests% - -Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "c_count", index: 0 }], 4)) - AggregateExec: mode=Partial, gby=[c_count@0 as c_count], aggr=[count(*)] - ProjectionExec: expr=[count(orders.o_orderkey)@1 as c_count] - AggregateExec: mode=SinglePartitioned, gby=[c_custkey@0 as c_custkey], aggr=[count(orders.o_orderkey)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Left, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, o_orderkey@1] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) - -Query Stage #3 (4 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "c_count", index: 0 }], 4)) - SortExec: expr=[custdist@1 DESC,c_count@0 DESC], preserve_partitioning=[true] - ProjectionExec: expr=[c_count@0 as c_count, count(*)@1 as custdist] - AggregateExec: mode=FinalPartitioned, gby=[c_count@0 as c_count], aggr=[count(*)] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "c_count", index: 0 }], 4)) - -Query Stage #4 (4 -> 1): -SortPreservingMergeExec: [custdist@1 DESC,c_count@0 DESC] - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "c_count", index: 0 }], 4)) +Query Stage #0 (1 -> 1): +SortExec: expr=[custdist@1 DESC,c_count@0 DESC], preserve_partitioning=[false] + ProjectionExec: expr=[c_count@0 as c_count, count(*)@1 as custdist] + AggregateExec: mode=Single, gby=[c_count@0 as c_count], aggr=[count(*)] + ProjectionExec: expr=[count(orders.o_orderkey)@1 as c_count] + AggregateExec: mode=Single, gby=[c_custkey@0 as c_custkey], aggr=[count(orders.o_orderkey)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Left, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, o_orderkey@1] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13211178]]}, projection=[c_custkey] + ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_comment@2 NOT LIKE %express%requests% + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54530383]]}, projection=[o_orderkey, o_custkey, o_comment], predicate=o_comment@8 NOT LIKE %express%requests% diff --git a/testdata/expected-plans/q14.txt b/testdata/expected-plans/q14.txt index b390e35..a2c3d52 100644 --- a/testdata/expected-plans/q14.txt +++ b/testdata/expected-plans/q14.txt @@ -14,51 +14,28 @@ DataFusion Physical Plan ======================== ProjectionExec: expr=[100 * CAST(sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END)@0 AS Float64) / CAST(sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 AS Float64) as promo_revenue] - AggregateExec: mode=Final, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - CoalescePartitionsExec - AggregateExec: mode=Partial, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, p_type@0 as p_type] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_type@1, l_extendedprice@3, l_discount@4] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_type] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4 - ProjectionExec: expr=[l_partkey@0 as l_partkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@3 >= 1995-02-01 AND l_shipdate@3 < 1995-03-01 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-02-01 AND l_shipdate@10 < 1995-03-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-02-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-03-01 END, required_guarantees=[] + AggregateExec: mode=Single, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, p_type@0 as p_type] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_type@1, l_extendedprice@3, l_discount@4] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_type] + ProjectionExec: expr=[l_partkey@0 as l_partkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@3 >= 1995-02-01 AND l_shipdate@3 < 1995-03-01 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-02-01 AND l_shipdate@10 < 1995-03-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-02-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-03-01 END, required_guarantees=[] RaySQL Plan =========== -Query Stage #0 (2 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_type] - -Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) - ProjectionExec: expr=[l_partkey@0 as l_partkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@3 >= 1995-02-01 AND l_shipdate@3 < 1995-03-01 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-02-01 AND l_shipdate@10 < 1995-03-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-02-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-03-01 END, required_guarantees=[] - -Query Stage #2 (4 -> 1): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([], 4)) - AggregateExec: mode=Partial, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] +Query Stage #0 (1 -> 1): +ProjectionExec: expr=[100 * CAST(sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END)@0 AS Float64) / CAST(sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 AS Float64) as promo_revenue] + AggregateExec: mode=Single, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_2, p_type@0 as p_type] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_type@1, l_extendedprice@3, l_discount@4] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) - -Query Stage #3 (1 -> 1): -ProjectionExec: expr=[100 * CAST(sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END)@0 AS Float64) / CAST(sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 AS Float64) as promo_revenue] - AggregateExec: mode=Final, gby=[], aggr=[sum(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - CoalescePartitionsExec - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([], 4)) + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_type@1, l_extendedprice@3, l_discount@4] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_type] + ProjectionExec: expr=[l_partkey@0 as l_partkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@3 >= 1995-02-01 AND l_shipdate@3 < 1995-03-01 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-02-01 AND l_shipdate@10 < 1995-03-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-02-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-03-01 END, required_guarantees=[] diff --git a/testdata/expected-plans/q16.txt b/testdata/expected-plans/q16.txt index 72e90d3..fdd9b0b 100644 --- a/testdata/expected-plans/q16.txt +++ b/testdata/expected-plans/q16.txt @@ -19,97 +19,43 @@ Sort: supplier_cnt DESC NULLS FIRST, part.p_brand ASC NULLS LAST, part.p_type AS DataFusion Physical Plan ======================== -SortPreservingMergeExec: [supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 ASC NULLS LAST,p_size@2 ASC NULLS LAST] - SortExec: expr=[supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 ASC NULLS LAST,p_size@2 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size, count(alias1)@3 as supplier_cnt] - AggregateExec: mode=FinalPartitioned, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size], aggr=[count(alias1)] +SortExec: expr=[supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 ASC NULLS LAST,p_size@2 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size, count(alias1)@3 as supplier_cnt] + AggregateExec: mode=Single, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size], aggr=[count(alias1)] + AggregateExec: mode=Single, gby=[p_brand@1 as p_brand, p_type@2 as p_type, p_size@3 as p_size, ps_suppkey@0 as alias1], aggr=[] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([p_brand@0, p_type@1, p_size@2], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size], aggr=[count(alias1)] - AggregateExec: mode=FinalPartitioned, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size, alias1@3 as alias1], aggr=[] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([p_brand@0, p_type@1, p_size@2, alias1@3], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[p_brand@1 as p_brand, p_type@2 as p_type, p_size@3 as p_size, ps_suppkey@0 as alias1], aggr=[] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=RightAnti, on=[(s_suppkey@0, ps_suppkey@0)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 - ProjectionExec: expr=[s_suppkey@0 as s_suppkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: s_comment@1 LIKE %Customer%Complaints% - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_comment], predicate=s_comment@6 LIKE %Customer%Complaints% - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4 - ProjectionExec: expr=[ps_suppkey@3 as ps_suppkey, p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_brand@1, p_type@2, p_size@3, ps_suppkey@5] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_brand@1 != Brand#14 AND p_type@2 NOT LIKE SMALL PLATED% AND Use p_size@3 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]) - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_brand, p_type, p_size], predicate=p_brand@3 != Brand#14 AND p_type@4 NOT LIKE SMALL PLATED% AND Use p_size@5 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]), pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 != Brand#14 OR Brand#14 != p_brand_max@1 END AND (CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 14 AND 14 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 6 AND 6 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 5 AND 5 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 31 AND 31 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 49 AND 49 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 15 AND 15 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 41 AND 41 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 47 AND 47 <= p_size_max@5 END), required_guarantees=[p_brand not in (Brand#14), p_size in (5, 31, 49, 47, 15, 14, 41, 6)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10445614], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10445614..20890506, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..722], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:722..10446336], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10446336..20891947]]}, projection=[ps_partkey, ps_suppkey] + HashJoinExec: mode=CollectLeft, join_type=RightAnti, on=[(s_suppkey@0, ps_suppkey@0)] + ProjectionExec: expr=[s_suppkey@0 as s_suppkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: s_comment@1 LIKE %Customer%Complaints% + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_comment], predicate=s_comment@6 LIKE %Customer%Complaints% + ProjectionExec: expr=[ps_suppkey@3 as ps_suppkey, p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_brand@1, p_type@2, p_size@3, ps_suppkey@5] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_brand@1 != Brand#14 AND p_type@2 NOT LIKE SMALL PLATED% AND Use p_size@3 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]) + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_type, p_size], predicate=p_brand@3 != Brand#14 AND p_type@4 NOT LIKE SMALL PLATED% AND Use p_size@5 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]), pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 != Brand#14 OR Brand#14 != p_brand_max@1 END AND (CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 14 AND 14 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 6 AND 6 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 5 AND 5 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 31 AND 31 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 49 AND 49 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 15 AND 15 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 41 AND 41 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 47 AND 47 <= p_size_max@5 END), required_guarantees=[p_brand not in (Brand#14), p_size in (14, 6, 31, 49, 15, 47, 41, 5)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41091907]]}, projection=[ps_partkey, ps_suppkey] RaySQL Plan =========== -Query Stage #0 (2 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ProjectionExec: expr=[s_suppkey@0 as s_suppkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: s_comment@1 LIKE %Customer%Complaints% - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_comment], predicate=s_comment@6 LIKE %Customer%Complaints% - -Query Stage #1 (2 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_brand@1 != Brand#14 AND p_type@2 NOT LIKE SMALL PLATED% AND Use p_size@3 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]) - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_brand, p_type, p_size], predicate=p_brand@3 != Brand#14 AND p_type@4 NOT LIKE SMALL PLATED% AND Use p_size@5 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]), pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 != Brand#14 OR Brand#14 != p_brand_max@1 END AND (CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 14 AND 14 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 6 AND 6 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 5 AND 5 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 31 AND 31 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 49 AND 49 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 15 AND 15 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 41 AND 41 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 47 AND 47 <= p_size_max@5 END), required_guarantees=[p_brand not in (Brand#14), p_size in (5, 31, 49, 47, 15, 14, 41, 6)] - -Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10445614], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10445614..20890506, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..722], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:722..10446336], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10446336..20891947]]}, projection=[ps_partkey, ps_suppkey] - -Query Stage #3 (4 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4)) - ProjectionExec: expr=[ps_suppkey@3 as ps_suppkey, p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_brand@1, p_type@2, p_size@3, ps_suppkey@5] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) - -Query Stage #4 (4 -> 4): -ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "p_brand", index: 0 }, Column { name: "p_type", index: 1 }, Column { name: "p_size", index: 2 }, Column { name: "alias1", index: 3 }], 4)) - AggregateExec: mode=Partial, gby=[p_brand@1 as p_brand, p_type@2 as p_type, p_size@3 as p_size, ps_suppkey@0 as alias1], aggr=[] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=RightAnti, on=[(s_suppkey@0, ps_suppkey@0)] +Query Stage #0 (1 -> 1): +SortExec: expr=[supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 ASC NULLS LAST,p_size@2 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size, count(alias1)@3 as supplier_cnt] + AggregateExec: mode=Single, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size], aggr=[count(alias1)] + AggregateExec: mode=Single, gby=[p_brand@1 as p_brand, p_type@2 as p_type, p_size@3 as p_size, ps_suppkey@0 as alias1], aggr=[] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4)) - -Query Stage #5 (4 -> 4): -ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "p_brand", index: 0 }, Column { name: "p_type", index: 1 }, Column { name: "p_size", index: 2 }], 4)) - AggregateExec: mode=Partial, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size], aggr=[count(alias1)] - AggregateExec: mode=FinalPartitioned, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size, alias1@3 as alias1], aggr=[] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "p_brand", index: 0 }, Column { name: "p_type", index: 1 }, Column { name: "p_size", index: 2 }, Column { name: "alias1", index: 3 }], 4)) - -Query Stage #6 (4 -> 4): -ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "p_brand", index: 0 }, Column { name: "p_type", index: 1 }, Column { name: "p_size", index: 2 }], 4)) - SortExec: expr=[supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 ASC NULLS LAST,p_size@2 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size, count(alias1)@3 as supplier_cnt] - AggregateExec: mode=FinalPartitioned, gby=[p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size], aggr=[count(alias1)] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "p_brand", index: 0 }, Column { name: "p_type", index: 1 }, Column { name: "p_size", index: 2 }], 4)) - -Query Stage #7 (4 -> 1): -SortPreservingMergeExec: [supplier_cnt@3 DESC,p_brand@0 ASC NULLS LAST,p_type@1 ASC NULLS LAST,p_size@2 ASC NULLS LAST] - ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "p_brand", index: 0 }, Column { name: "p_type", index: 1 }, Column { name: "p_size", index: 2 }], 4)) + HashJoinExec: mode=CollectLeft, join_type=RightAnti, on=[(s_suppkey@0, ps_suppkey@0)] + ProjectionExec: expr=[s_suppkey@0 as s_suppkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: s_comment@1 LIKE %Customer%Complaints% + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_comment], predicate=s_comment@6 LIKE %Customer%Complaints% + ProjectionExec: expr=[ps_suppkey@3 as ps_suppkey, p_brand@0 as p_brand, p_type@1 as p_type, p_size@2 as p_size] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_brand@1, p_type@2, p_size@3, ps_suppkey@5] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_brand@1 != Brand#14 AND p_type@2 NOT LIKE SMALL PLATED% AND Use p_size@3 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]) + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_type, p_size], predicate=p_brand@3 != Brand#14 AND p_type@4 NOT LIKE SMALL PLATED% AND Use p_size@5 IN (SET) ([Literal { value: Int32(14) }, Literal { value: Int32(6) }, Literal { value: Int32(5) }, Literal { value: Int32(31) }, Literal { value: Int32(49) }, Literal { value: Int32(15) }, Literal { value: Int32(41) }, Literal { value: Int32(47) }]), pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 != Brand#14 OR Brand#14 != p_brand_max@1 END AND (CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 14 AND 14 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 6 AND 6 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 5 AND 5 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 31 AND 31 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 49 AND 49 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 15 AND 15 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 41 AND 41 <= p_size_max@5 END OR CASE WHEN p_size_null_count@6 = p_size_row_count@7 THEN false ELSE p_size_min@4 <= 47 AND 47 <= p_size_max@5 END), required_guarantees=[p_brand not in (Brand#14), p_size in (14, 6, 31, 49, 15, 47, 41, 5)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41091907]]}, projection=[ps_partkey, ps_suppkey] diff --git a/testdata/expected-plans/q17.txt b/testdata/expected-plans/q17.txt index b159323..92f4079 100644 --- a/testdata/expected-plans/q17.txt +++ b/testdata/expected-plans/q17.txt @@ -20,70 +20,38 @@ DataFusion Physical Plan ======================== ProjectionExec: expr=[CAST(sum(lineitem.l_extendedprice)@0 AS Float64) / 7 as avg_yearly] - AggregateExec: mode=Final, gby=[], aggr=[sum(lineitem.l_extendedprice)] - CoalescePartitionsExec - AggregateExec: mode=Partial, gby=[], aggr=[sum(lineitem.l_extendedprice)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@2, l_partkey@1)], filter=CAST(l_quantity@0 AS Decimal128(30, 15)) < Float64(0.2) * avg(lineitem.l_quantity)@1, projection=[l_extendedprice@1] - ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, p_partkey@0 as p_partkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_partkey@0, l_quantity@2, l_extendedprice@3] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 - ProjectionExec: expr=[p_partkey@0 as p_partkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_brand@1 = Brand#42 AND p_container@2 = LG BAG - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_brand, p_container], predicate=p_brand@3 = Brand#42 AND p_container@6 = LG BAG, pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 <= Brand#42 AND Brand#42 <= p_brand_max@1 END AND CASE WHEN p_container_null_count@6 = p_container_row_count@7 THEN false ELSE p_container_min@4 <= LG BAG AND LG BAG <= p_container_max@5 END, required_guarantees=[p_brand in (Brand#42), p_container in (LG BAG)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_partkey, l_quantity, l_extendedprice] - ProjectionExec: expr=[CAST(0.2 * CAST(avg(lineitem.l_quantity)@1 AS Float64) AS Decimal128(30, 15)) as Float64(0.2) * avg(lineitem.l_quantity), l_partkey@0 as l_partkey] - AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] + AggregateExec: mode=Single, gby=[], aggr=[sum(lineitem.l_extendedprice)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@2, l_partkey@1)], filter=CAST(l_quantity@0 AS Decimal128(30, 15)) < Float64(0.2) * avg(lineitem.l_quantity)@1, projection=[l_extendedprice@1] + ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_partkey@0, l_quantity@2, l_extendedprice@3] + ProjectionExec: expr=[p_partkey@0 as p_partkey] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_partkey, l_quantity] + FilterExec: p_brand@1 = Brand#42 AND p_container@2 = LG BAG + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_container], predicate=p_brand@3 = Brand#42 AND p_container@6 = LG BAG, pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 <= Brand#42 AND Brand#42 <= p_brand_max@1 END AND CASE WHEN p_container_null_count@6 = p_container_row_count@7 THEN false ELSE p_container_min@4 <= LG BAG AND LG BAG <= p_container_max@5 END, required_guarantees=[p_brand in (Brand#42), p_container in (LG BAG)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_partkey, l_quantity, l_extendedprice] + ProjectionExec: expr=[CAST(0.2 * CAST(avg(lineitem.l_quantity)@1 AS Float64) AS Decimal128(30, 15)) as Float64(0.2) * avg(lineitem.l_quantity), l_partkey@0 as l_partkey] + AggregateExec: mode=Single, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_partkey, l_quantity] RaySQL Plan =========== -Query Stage #0 (2 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - ProjectionExec: expr=[p_partkey@0 as p_partkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_brand@1 = Brand#42 AND p_container@2 = LG BAG - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_brand, p_container], predicate=p_brand@3 = Brand#42 AND p_container@6 = LG BAG, pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 <= Brand#42 AND Brand#42 <= p_brand_max@1 END AND CASE WHEN p_container_null_count@6 = p_container_row_count@7 THEN false ELSE p_container_min@4 <= LG BAG AND LG BAG <= p_container_max@5 END, required_guarantees=[p_brand in (Brand#42), p_container in (LG BAG)] - -Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_partkey, l_quantity, l_extendedprice] - -Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) - AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_partkey, l_quantity] - -Query Stage #3 (4 -> 1): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([], 4)) - AggregateExec: mode=Partial, gby=[], aggr=[sum(lineitem.l_extendedprice)] +Query Stage #0 (1 -> 1): +ProjectionExec: expr=[CAST(sum(lineitem.l_extendedprice)@0 AS Float64) / 7 as avg_yearly] + AggregateExec: mode=Single, gby=[], aggr=[sum(lineitem.l_extendedprice)] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@2, l_partkey@1)], filter=CAST(l_quantity@0 AS Decimal128(30, 15)) < Float64(0.2) * avg(lineitem.l_quantity)@1, projection=[l_extendedprice@1] + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@2, l_partkey@1)], filter=CAST(l_quantity@0 AS Decimal128(30, 15)) < Float64(0.2) * avg(lineitem.l_quantity)@1, projection=[l_extendedprice@1] ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, p_partkey@0 as p_partkey] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_partkey@0, l_quantity@2, l_extendedprice@3] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], projection=[p_partkey@0, l_quantity@2, l_extendedprice@3] + ProjectionExec: expr=[p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_brand@1 = Brand#42 AND p_container@2 = LG BAG + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_container], predicate=p_brand@3 = Brand#42 AND p_container@6 = LG BAG, pruning_predicate=CASE WHEN p_brand_null_count@2 = p_brand_row_count@3 THEN false ELSE p_brand_min@0 <= Brand#42 AND Brand#42 <= p_brand_max@1 END AND CASE WHEN p_container_null_count@6 = p_container_row_count@7 THEN false ELSE p_container_min@4 <= LG BAG AND LG BAG <= p_container_max@5 END, required_guarantees=[p_brand in (Brand#42), p_container in (LG BAG)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_partkey, l_quantity, l_extendedprice] ProjectionExec: expr=[CAST(0.2 * CAST(avg(lineitem.l_quantity)@1 AS Float64) AS Decimal128(30, 15)) as Float64(0.2) * avg(lineitem.l_quantity), l_partkey@0 as l_partkey] - AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) - -Query Stage #4 (1 -> 1): -ProjectionExec: expr=[CAST(sum(lineitem.l_extendedprice)@0 AS Float64) / 7 as avg_yearly] - AggregateExec: mode=Final, gby=[], aggr=[sum(lineitem.l_extendedprice)] - CoalescePartitionsExec - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([], 4)) + AggregateExec: mode=Single, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_partkey, l_quantity] diff --git a/testdata/expected-plans/q18.txt b/testdata/expected-plans/q18.txt index a866530..57a45fa 100644 --- a/testdata/expected-plans/q18.txt +++ b/testdata/expected-plans/q18.txt @@ -21,95 +21,41 @@ Limit: skip=0, fetch=100 DataFusion Physical Plan ======================== -GlobalLimitExec: skip=0, fetch=100 - SortPreservingMergeExec: [o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAST], fetch=100 - SortExec: TopK(fetch=100), expr=[o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAST], preserve_partitioning=[true] - AggregateExec: mode=FinalPartitioned, gby=[c_name@0 as c_name, c_custkey@1 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@3 as o_orderdate, o_totalprice@4 as o_totalprice], aggr=[sum(lineitem.l_quantity)] +SortExec: TopK(fetch=100), expr=[o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAST], preserve_partitioning=[false] + AggregateExec: mode=Single, gby=[c_name@1 as c_name, c_custkey@0 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@4 as o_orderdate, o_totalprice@3 as o_totalprice], aggr=[sum(lineitem.l_quantity)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(l_orderkey@0, o_orderkey@2)] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: sum(lineitem.l_quantity)@1 > Some(31300),21,2 + AggregateExec: mode=Single, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_orderkey, l_quantity] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([c_name@0, c_custkey@1, o_orderkey@2, o_orderdate@3, o_totalprice@4], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[c_name@1 as c_name, c_custkey@0 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@4 as o_orderdate, o_totalprice@3 as o_totalprice], aggr=[sum(lineitem.l_quantity)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=RightSemi, on=[(l_orderkey@0, o_orderkey@2)] - ProjectionExec: expr=[l_orderkey@0 as l_orderkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: sum(lineitem.l_quantity)@1 > Some(31300),21,2 - AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_quantity] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@2, l_orderkey@0)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@3, o_orderdate@4, l_quantity@6] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_orderkey@2], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@4, o_orderdate@5] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3390424], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3390424..6780848], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6780848..6782476, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3388796], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3388796..6779220]]}, projection=[c_custkey, c_name] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_quantity] + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@2, l_orderkey@0)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@3, o_orderdate@4, l_quantity@6] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@4, o_orderdate@5] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13211178]]}, projection=[c_custkey, c_name] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54530383]]}, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_orderkey, l_quantity] RaySQL Plan =========== -Query Stage #0 (4 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - AggregateExec: mode=Partial, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_quantity] - -Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3390424], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3390424..6780848], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6780848..6782476, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3388796], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3388796..6779220]]}, projection=[c_custkey, c_name] - -Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] - -Query Stage #3 (4 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "o_orderkey", index: 2 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@4, o_orderdate@5] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) - -Query Stage #4 (4 -> 4): -ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_quantity] - -Query Stage #5 (4 -> 4): -ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "c_name", index: 0 }, Column { name: "c_custkey", index: 1 }, Column { name: "o_orderkey", index: 2 }, Column { name: "o_orderdate", index: 3 }, Column { name: "o_totalprice", index: 4 }], 4)) - AggregateExec: mode=Partial, gby=[c_name@1 as c_name, c_custkey@0 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@4 as o_orderdate, o_totalprice@3 as o_totalprice], aggr=[sum(lineitem.l_quantity)] +Query Stage #0 (1 -> 1): +SortExec: TopK(fetch=100), expr=[o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAST], preserve_partitioning=[false] + AggregateExec: mode=Single, gby=[c_name@1 as c_name, c_custkey@0 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@4 as o_orderdate, o_totalprice@3 as o_totalprice], aggr=[sum(lineitem.l_quantity)] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=RightSemi, on=[(l_orderkey@0, o_orderkey@2)] + HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(l_orderkey@0, o_orderkey@2)] ProjectionExec: expr=[l_orderkey@0 as l_orderkey] CoalesceBatchesExec: target_batch_size=8192 FilterExec: sum(lineitem.l_quantity)@1 > Some(31300),21,2 - AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) + AggregateExec: mode=Single, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_orderkey, l_quantity] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@2, l_orderkey@0)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@3, o_orderdate@4, l_quantity@6] + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@2, l_orderkey@0)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@3, o_orderdate@4, l_quantity@6] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "o_orderkey", index: 2 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - -Query Stage #6 (4 -> 4): -ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "c_name", index: 0 }, Column { name: "c_custkey", index: 1 }, Column { name: "o_orderkey", index: 2 }, Column { name: "o_orderdate", index: 3 }, Column { name: "o_totalprice", index: 4 }], 4)) - SortExec: TopK(fetch=100), expr=[o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAST], preserve_partitioning=[true] - AggregateExec: mode=FinalPartitioned, gby=[c_name@0 as c_name, c_custkey@1 as c_custkey, o_orderkey@2 as o_orderkey, o_orderdate@3 as o_orderdate, o_totalprice@4 as o_totalprice], aggr=[sum(lineitem.l_quantity)] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "c_name", index: 0 }, Column { name: "c_custkey", index: 1 }, Column { name: "o_orderkey", index: 2 }, Column { name: "o_orderdate", index: 3 }, Column { name: "o_totalprice", index: 4 }], 4)) - -Query Stage #7 (1 -> 1): -GlobalLimitExec: skip=0, fetch=100 - SortPreservingMergeExec: [o_totalprice@4 DESC,o_orderdate@3 ASC NULLS LAST], fetch=100 - ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "c_name", index: 0 }, Column { name: "c_custkey", index: 1 }, Column { name: "o_orderkey", index: 2 }, Column { name: "o_orderdate", index: 3 }, Column { name: "o_totalprice", index: 4 }], 4)) + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@4, o_orderdate@5] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13211178]]}, projection=[c_custkey, c_name] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54530383]]}, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_orderkey, l_quantity] diff --git a/testdata/expected-plans/q19.txt b/testdata/expected-plans/q19.txt index b7f0b76..66c9ced 100644 --- a/testdata/expected-plans/q19.txt +++ b/testdata/expected-plans/q19.txt @@ -15,53 +15,30 @@ DataFusion Physical Plan ======================== ProjectionExec: expr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@0 as revenue] - AggregateExec: mode=Final, gby=[], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - CoalescePartitionsExec - AggregateExec: mode=Partial, gby=[], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + AggregateExec: mode=Single, gby=[], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], filter=p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND l_quantity@0 >= Some(800),11,2 AND l_quantity@0 <= Some(1800),11,2 AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND l_quantity@0 >= Some(2000),11,2 AND l_quantity@0 <= Some(3000),11,2 AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND l_quantity@0 >= Some(3000),11,2 AND l_quantity@0 <= Some(4000),11,2 AND p_size@2 <= 15, projection=[l_extendedprice@6, l_discount@7] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], filter=p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND l_quantity@0 >= Some(800),11,2 AND l_quantity@0 <= Some(1800),11,2 AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND l_quantity@0 >= Some(2000),11,2 AND l_quantity@0 <= Some(3000),11,2 AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND l_quantity@0 >= Some(3000),11,2 AND l_quantity@0 <= Some(4000),11,2 AND p_size@2 <= 15, projection=[l_extendedprice@6, l_discount@7] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: (p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@2 <= 15) AND p_size@2 >= 1 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_brand, p_size, p_container], predicate=p_size@5 >= 1 AND (p_brand@3 = Brand#21 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@5 <= 5 OR p_brand@3 = Brand#13 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@5 <= 10 OR p_brand@3 = Brand#52 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@5 <= 15), pruning_predicate=CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_max@0 >= 1 END AND (CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#21 AND Brand#21 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM CASE AND SM CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM BOX AND SM BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PACK AND SM PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PKG AND SM PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 5 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#13 AND Brand#13 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BAG AND MED BAG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BOX AND MED BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PKG AND MED PKG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PACK AND MED PACK <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 10 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#52 AND Brand#52 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG CASE AND LG CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG BOX AND LG BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PACK AND LG PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PKG AND LG PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 15 END), required_guarantees=[] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4 - ProjectionExec: expr=[l_partkey@0 as l_partkey, l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: (l_quantity@1 >= Some(800),11,2 AND l_quantity@1 <= Some(1800),11,2 OR l_quantity@1 >= Some(2000),11,2 AND l_quantity@1 <= Some(3000),11,2 OR l_quantity@1 >= Some(3000),11,2 AND l_quantity@1 <= Some(4000),11,2) AND (l_shipmode@5 = AIR OR l_shipmode@5 = AIR REG) AND l_shipinstruct@4 = DELIVER IN PERSON - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], predicate=(l_shipmode@14 = AIR OR l_shipmode@14 = AIR REG) AND l_shipinstruct@13 = DELIVER IN PERSON AND (l_quantity@4 >= Some(800),11,2 AND l_quantity@4 <= Some(1800),11,2 OR l_quantity@4 >= Some(2000),11,2 AND l_quantity@4 <= Some(3000),11,2 OR l_quantity@4 >= Some(3000),11,2 AND l_quantity@4 <= Some(4000),11,2), pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR AND AIR <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR REG AND AIR REG <= l_shipmode_max@1 END) AND CASE WHEN l_shipinstruct_null_count@6 = l_shipinstruct_row_count@7 THEN false ELSE l_shipinstruct_min@4 <= DELIVER IN PERSON AND DELIVER IN PERSON <= l_shipinstruct_max@5 END AND (CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(800),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(1800),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(2000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(3000),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(3000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(4000),11,2 END), required_guarantees=[l_shipmode in (AIR, AIR REG), l_shipinstruct in (DELIVER IN PERSON)] + FilterExec: (p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@2 <= 15) AND p_size@2 >= 1 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_size, p_container], predicate=p_size@5 >= 1 AND (p_brand@3 = Brand#21 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@5 <= 5 OR p_brand@3 = Brand#13 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@5 <= 10 OR p_brand@3 = Brand#52 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@5 <= 15), pruning_predicate=CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_max@0 >= 1 END AND (CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#21 AND Brand#21 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM CASE AND SM CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM BOX AND SM BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PACK AND SM PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PKG AND SM PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 5 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#13 AND Brand#13 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BAG AND MED BAG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BOX AND MED BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PKG AND MED PKG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PACK AND MED PACK <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 10 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#52 AND Brand#52 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG CASE AND LG CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG BOX AND LG BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PACK AND LG PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PKG AND LG PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 15 END), required_guarantees=[] + ProjectionExec: expr=[l_partkey@0 as l_partkey, l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: (l_quantity@1 >= Some(800),11,2 AND l_quantity@1 <= Some(1800),11,2 OR l_quantity@1 >= Some(2000),11,2 AND l_quantity@1 <= Some(3000),11,2 OR l_quantity@1 >= Some(3000),11,2 AND l_quantity@1 <= Some(4000),11,2) AND (l_shipmode@5 = AIR OR l_shipmode@5 = AIR REG) AND l_shipinstruct@4 = DELIVER IN PERSON + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], predicate=(l_shipmode@14 = AIR OR l_shipmode@14 = AIR REG) AND l_shipinstruct@13 = DELIVER IN PERSON AND (l_quantity@4 >= Some(800),11,2 AND l_quantity@4 <= Some(1800),11,2 OR l_quantity@4 >= Some(2000),11,2 AND l_quantity@4 <= Some(3000),11,2 OR l_quantity@4 >= Some(3000),11,2 AND l_quantity@4 <= Some(4000),11,2), pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR AND AIR <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR REG AND AIR REG <= l_shipmode_max@1 END) AND CASE WHEN l_shipinstruct_null_count@6 = l_shipinstruct_row_count@7 THEN false ELSE l_shipinstruct_min@4 <= DELIVER IN PERSON AND DELIVER IN PERSON <= l_shipinstruct_max@5 END AND (CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(800),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(1800),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(2000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(3000),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(3000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(4000),11,2 END), required_guarantees=[l_shipmode in (AIR, AIR REG), l_shipinstruct in (DELIVER IN PERSON)] RaySQL Plan =========== -Query Stage #0 (2 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: (p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@2 <= 15) AND p_size@2 >= 1 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_brand, p_size, p_container], predicate=p_size@5 >= 1 AND (p_brand@3 = Brand#21 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@5 <= 5 OR p_brand@3 = Brand#13 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@5 <= 10 OR p_brand@3 = Brand#52 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@5 <= 15), pruning_predicate=CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_max@0 >= 1 END AND (CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#21 AND Brand#21 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM CASE AND SM CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM BOX AND SM BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PACK AND SM PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PKG AND SM PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 5 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#13 AND Brand#13 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BAG AND MED BAG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BOX AND MED BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PKG AND MED PKG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PACK AND MED PACK <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 10 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#52 AND Brand#52 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG CASE AND LG CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG BOX AND LG BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PACK AND LG PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PKG AND LG PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 15 END), required_guarantees=[] - -Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) - ProjectionExec: expr=[l_partkey@0 as l_partkey, l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: (l_quantity@1 >= Some(800),11,2 AND l_quantity@1 <= Some(1800),11,2 OR l_quantity@1 >= Some(2000),11,2 AND l_quantity@1 <= Some(3000),11,2 OR l_quantity@1 >= Some(3000),11,2 AND l_quantity@1 <= Some(4000),11,2) AND (l_shipmode@5 = AIR OR l_shipmode@5 = AIR REG) AND l_shipinstruct@4 = DELIVER IN PERSON - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], predicate=(l_shipmode@14 = AIR OR l_shipmode@14 = AIR REG) AND l_shipinstruct@13 = DELIVER IN PERSON AND (l_quantity@4 >= Some(800),11,2 AND l_quantity@4 <= Some(1800),11,2 OR l_quantity@4 >= Some(2000),11,2 AND l_quantity@4 <= Some(3000),11,2 OR l_quantity@4 >= Some(3000),11,2 AND l_quantity@4 <= Some(4000),11,2), pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR AND AIR <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR REG AND AIR REG <= l_shipmode_max@1 END) AND CASE WHEN l_shipinstruct_null_count@6 = l_shipinstruct_row_count@7 THEN false ELSE l_shipinstruct_min@4 <= DELIVER IN PERSON AND DELIVER IN PERSON <= l_shipinstruct_max@5 END AND (CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(800),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(1800),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(2000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(3000),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(3000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(4000),11,2 END), required_guarantees=[l_shipmode in (AIR, AIR REG), l_shipinstruct in (DELIVER IN PERSON)] - -Query Stage #2 (4 -> 1): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([], 4)) - AggregateExec: mode=Partial, gby=[], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] +Query Stage #0 (1 -> 1): +ProjectionExec: expr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@0 as revenue] + AggregateExec: mode=Single, gby=[], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], filter=p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND l_quantity@0 >= Some(800),11,2 AND l_quantity@0 <= Some(1800),11,2 AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND l_quantity@0 >= Some(2000),11,2 AND l_quantity@0 <= Some(3000),11,2 AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND l_quantity@0 >= Some(3000),11,2 AND l_quantity@0 <= Some(4000),11,2 AND p_size@2 <= 15, projection=[l_extendedprice@6, l_discount@7] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@0)], filter=p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND l_quantity@0 >= Some(800),11,2 AND l_quantity@0 <= Some(1800),11,2 AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND l_quantity@0 >= Some(2000),11,2 AND l_quantity@0 <= Some(3000),11,2 AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND l_quantity@0 >= Some(3000),11,2 AND l_quantity@0 <= Some(4000),11,2 AND p_size@2 <= 15, projection=[l_extendedprice@6, l_discount@7] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4)) - -Query Stage #3 (1 -> 1): -ProjectionExec: expr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@0 as revenue] - AggregateExec: mode=Final, gby=[], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - CoalescePartitionsExec - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([], 4)) + FilterExec: (p_brand@1 = Brand#21 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@2 <= 5 OR p_brand@1 = Brand#13 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@2 <= 10 OR p_brand@1 = Brand#52 AND Use p_container@3 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@2 <= 15) AND p_size@2 >= 1 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_brand, p_size, p_container], predicate=p_size@5 >= 1 AND (p_brand@3 = Brand#21 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("SM CASE") }, Literal { value: Utf8("SM BOX") }, Literal { value: Utf8("SM PACK") }, Literal { value: Utf8("SM PKG") }]) AND p_size@5 <= 5 OR p_brand@3 = Brand#13 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("MED BAG") }, Literal { value: Utf8("MED BOX") }, Literal { value: Utf8("MED PKG") }, Literal { value: Utf8("MED PACK") }]) AND p_size@5 <= 10 OR p_brand@3 = Brand#52 AND Use p_container@6 IN (SET) ([Literal { value: Utf8("LG CASE") }, Literal { value: Utf8("LG BOX") }, Literal { value: Utf8("LG PACK") }, Literal { value: Utf8("LG PKG") }]) AND p_size@5 <= 15), pruning_predicate=CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_max@0 >= 1 END AND (CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#21 AND Brand#21 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM CASE AND SM CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM BOX AND SM BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PACK AND SM PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= SM PKG AND SM PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 5 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#13 AND Brand#13 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BAG AND MED BAG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED BOX AND MED BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PKG AND MED PKG <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= MED PACK AND MED PACK <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 10 END OR CASE WHEN p_brand_null_count@5 = p_brand_row_count@6 THEN false ELSE p_brand_min@3 <= Brand#52 AND Brand#52 <= p_brand_max@4 END AND (CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG CASE AND LG CASE <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG BOX AND LG BOX <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PACK AND LG PACK <= p_container_max@8 END OR CASE WHEN p_container_null_count@9 = p_container_row_count@10 THEN false ELSE p_container_min@7 <= LG PKG AND LG PKG <= p_container_max@8 END) AND CASE WHEN p_size_null_count@1 = p_size_row_count@2 THEN false ELSE p_size_min@11 <= 15 END), required_guarantees=[] + ProjectionExec: expr=[l_partkey@0 as l_partkey, l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: (l_quantity@1 >= Some(800),11,2 AND l_quantity@1 <= Some(1800),11,2 OR l_quantity@1 >= Some(2000),11,2 AND l_quantity@1 <= Some(3000),11,2 OR l_quantity@1 >= Some(3000),11,2 AND l_quantity@1 <= Some(4000),11,2) AND (l_shipmode@5 = AIR OR l_shipmode@5 = AIR REG) AND l_shipinstruct@4 = DELIVER IN PERSON + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_partkey, l_quantity, l_extendedprice, l_discount, l_shipinstruct, l_shipmode], predicate=(l_shipmode@14 = AIR OR l_shipmode@14 = AIR REG) AND l_shipinstruct@13 = DELIVER IN PERSON AND (l_quantity@4 >= Some(800),11,2 AND l_quantity@4 <= Some(1800),11,2 OR l_quantity@4 >= Some(2000),11,2 AND l_quantity@4 <= Some(3000),11,2 OR l_quantity@4 >= Some(3000),11,2 AND l_quantity@4 <= Some(4000),11,2), pruning_predicate=(CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR AND AIR <= l_shipmode_max@1 END OR CASE WHEN l_shipmode_null_count@2 = l_shipmode_row_count@3 THEN false ELSE l_shipmode_min@0 <= AIR REG AND AIR REG <= l_shipmode_max@1 END) AND CASE WHEN l_shipinstruct_null_count@6 = l_shipinstruct_row_count@7 THEN false ELSE l_shipinstruct_min@4 <= DELIVER IN PERSON AND DELIVER IN PERSON <= l_shipinstruct_max@5 END AND (CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(800),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(1800),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(2000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(3000),11,2 END OR CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_max@8 >= Some(3000),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@11 <= Some(4000),11,2 END), required_guarantees=[l_shipmode in (AIR, AIR REG), l_shipinstruct in (DELIVER IN PERSON)] diff --git a/testdata/expected-plans/q2.txt b/testdata/expected-plans/q2.txt index fc6e279..bc2c04a 100644 --- a/testdata/expected-plans/q2.txt +++ b/testdata/expected-plans/q2.txt @@ -41,227 +41,93 @@ Limit: skip=0, fetch=100 DataFusion Physical Plan ======================== -GlobalLimitExec: skip=0, fetch=100 - SortPreservingMergeExec: [s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST], fetch=100 - SortExec: TopK(fetch=100), expr=[s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[s_acctbal@5 as s_acctbal, s_name@2 as s_name, n_name@7 as n_name, p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, s_address@3 as s_address, s_phone@4 as s_phone, s_comment@6 as s_comment] +SortExec: TopK(fetch=100), expr=[s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[s_acctbal@5 as s_acctbal, s_name@2 as s_name, n_name@7 as n_name, p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, s_address@3 as s_address, s_phone@4 as s_phone, s_comment@6 as s_comment] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, ps_partkey@1), (ps_supplycost@7, min(partsupp.ps_supplycost)@0)], projection=[p_partkey@0, p_mfgr@1, s_name@2, s_address@3, s_phone@4, s_acctbal@5, s_comment@6, n_name@8] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@1), (ps_supplycost@7, min(partsupp.ps_supplycost)@0)], projection=[p_partkey@0, p_mfgr@1, s_name@2, s_address@3, s_phone@4, s_acctbal@5, s_comment@6, n_name@8] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([p_partkey@0, ps_supplycost@7], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@9)], projection=[p_partkey@1, p_mfgr@2, s_name@3, s_address@4, s_phone@5, s_acctbal@6, s_comment@7, ps_supplycost@8, n_name@9] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = ASIA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(r_regionkey@0, n_regionkey@9)], projection=[p_partkey@1, p_mfgr@2, s_name@3, s_address@4, s_phone@5, s_acctbal@6, s_comment@7, ps_supplycost@8, n_name@9] + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: r_name@1 = ASIA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] + ProjectionExec: expr=[p_partkey@2 as p_partkey, p_mfgr@3 as p_mfgr, s_name@4 as s_name, s_address@5 as s_address, s_phone@6 as s_phone, s_acctbal@7 as s_acctbal, s_comment@8 as s_comment, ps_supplycost@9 as ps_supplycost, n_name@0 as n_name, n_regionkey@1 as n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@4)], projection=[n_name@1, n_regionkey@2, p_partkey@3, p_mfgr@4, s_name@5, s_address@6, s_phone@8, s_acctbal@9, s_comment@10, ps_supplycost@11] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] + ProjectionExec: expr=[p_partkey@6 as p_partkey, p_mfgr@7 as p_mfgr, s_name@0 as s_name, s_address@1 as s_address, s_nationkey@2 as s_nationkey, s_phone@3 as s_phone, s_acctbal@4 as s_acctbal, s_comment@5 as s_comment, ps_supplycost@8 as ps_supplycost] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_regionkey@9], 4), input_partitions=4 - ProjectionExec: expr=[p_partkey@2 as p_partkey, p_mfgr@3 as p_mfgr, s_name@4 as s_name, s_address@5 as s_address, s_phone@6 as s_phone, s_acctbal@7 as s_acctbal, s_comment@8 as s_comment, ps_supplycost@9 as ps_supplycost, n_name@0 as n_name, n_regionkey@1 as n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@4)], projection=[n_name@1, n_regionkey@2, p_partkey@3, p_mfgr@4, s_name@5, s_address@6, s_phone@8, s_acctbal@9, s_comment@10, ps_supplycost@11] + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@2)], projection=[s_name@1, s_address@2, s_nationkey@3, s_phone@4, s_acctbal@5, s_comment@6, p_partkey@7, p_mfgr@8, ps_supplycost@10] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_partkey@0, p_mfgr@1, ps_suppkey@3, ps_supplycost@4] + ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_nationkey@4], 4), input_partitions=4 - ProjectionExec: expr=[p_partkey@6 as p_partkey, p_mfgr@7 as p_mfgr, s_name@0 as s_name, s_address@1 as s_address, s_nationkey@2 as s_nationkey, s_phone@3 as s_phone, s_acctbal@4 as s_acctbal, s_comment@5 as s_comment, ps_supplycost@8 as ps_supplycost] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@2)], projection=[s_name@1, s_address@2, s_nationkey@3, s_phone@4, s_acctbal@5, s_comment@6, p_partkey@7, p_mfgr@8, ps_supplycost@10] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([ps_suppkey@2], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_partkey@0, p_mfgr@1, ps_suppkey@3, ps_supplycost@4] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 - ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_size@3 = 48 AND p_type@2 LIKE %TIN - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_mfgr, p_type, p_size], predicate=p_size@5 = 48 AND p_type@4 LIKE %TIN, pruning_predicate=CASE WHEN p_size_null_count@2 = p_size_row_count@3 THEN false ELSE p_size_min@0 <= 48 AND 48 <= p_size_max@1 END, required_guarantees=[p_size in (48)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10445614], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10445614..20890506, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..722], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:722..10446336], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10446336..20891947]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + FilterExec: p_size@3 = 48 AND p_type@2 LIKE %TIN + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_mfgr, p_type, p_size], predicate=p_size@5 = 48 AND p_type@4 LIKE %TIN, pruning_predicate=CASE WHEN p_size_null_count@2 = p_size_row_count@3 THEN false ELSE p_size_min@0 <= 48 AND 48 <= p_size_max@1 END, required_guarantees=[p_size in (48)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41091907]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + ProjectionExec: expr=[min(partsupp.ps_supplycost)@1 as min(partsupp.ps_supplycost), ps_partkey@0 as ps_partkey] + AggregateExec: mode=Single, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([ps_partkey@1, min(partsupp.ps_supplycost)@0], 4), input_partitions=4 - ProjectionExec: expr=[min(partsupp.ps_supplycost)@1 as min(partsupp.ps_supplycost), ps_partkey@0 as ps_partkey] - AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@2)], projection=[ps_partkey@1, ps_supplycost@2] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = ASIA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_regionkey@2], 4), input_partitions=4 - ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, n_regionkey@0 as n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_regionkey@1, ps_partkey@2, ps_supplycost@3] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4 - ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_supplycost@4] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([ps_suppkey@1], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10445614], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10445614..20890506, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..722], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:722..10446336], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10446336..20891947]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(r_regionkey@0, n_regionkey@2)], projection=[ps_partkey@1, ps_supplycost@2] + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: r_name@1 = ASIA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, n_regionkey@0 as n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_regionkey@1, ps_partkey@2, ps_supplycost@3] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_regionkey] + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_supplycost@4] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41091907]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] RaySQL Plan =========== -Query Stage #0 (1 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = ASIA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] - -Query Stage #1 (1 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] - -Query Stage #2 (2 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] - -Query Stage #3 (2 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_size@3 = 48 AND p_type@2 LIKE %TIN - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_mfgr, p_type, p_size], predicate=p_size@5 = 48 AND p_type@4 LIKE %TIN, pruning_predicate=CASE WHEN p_size_null_count@2 = p_size_row_count@3 THEN false ELSE p_size_min@0 <= 48 AND 48 <= p_size_max@1 END, required_guarantees=[p_size in (48)] - -Query Stage #4 (4 -> 4): -ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10445614], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10445614..20890506, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..722], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:722..10446336], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10446336..20891947]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] - -Query Stage #5 (4 -> 4): -ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "ps_suppkey", index: 2 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_partkey@0, p_mfgr@1, ps_suppkey@3, ps_supplycost@4] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) - -Query Stage #6 (4 -> 4): -ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "s_nationkey", index: 4 }], 4)) - ProjectionExec: expr=[p_partkey@6 as p_partkey, p_mfgr@7 as p_mfgr, s_name@0 as s_name, s_address@1 as s_address, s_nationkey@2 as s_nationkey, s_phone@3 as s_phone, s_acctbal@4 as s_acctbal, s_comment@5 as s_comment, ps_supplycost@8 as ps_supplycost] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@2)], projection=[s_name@1, s_address@2, s_nationkey@3, s_phone@4, s_acctbal@5, s_comment@6, p_partkey@7, p_mfgr@8, ps_supplycost@10] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "ps_suppkey", index: 2 }], 4)) - -Query Stage #7 (4 -> 4): -ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "n_regionkey", index: 9 }], 4)) - ProjectionExec: expr=[p_partkey@2 as p_partkey, p_mfgr@3 as p_mfgr, s_name@4 as s_name, s_address@5 as s_address, s_phone@6 as s_phone, s_acctbal@7 as s_acctbal, s_comment@8 as s_comment, ps_supplycost@9 as ps_supplycost, n_name@0 as n_name, n_regionkey@1 as n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@4)], projection=[n_name@1, n_regionkey@2, p_partkey@3, p_mfgr@4, s_name@5, s_address@6, s_phone@8, s_acctbal@9, s_comment@10, ps_supplycost@11] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "s_nationkey", index: 4 }], 4)) - -Query Stage #8 (4 -> 4): -ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }, Column { name: "ps_supplycost", index: 7 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@9)], projection=[p_partkey@1, p_mfgr@2, s_name@3, s_address@4, s_phone@5, s_acctbal@6, s_comment@7, ps_supplycost@8, n_name@9] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "n_regionkey", index: 9 }], 4)) - -Query Stage #9 (1 -> 4): -ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = ASIA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] - -Query Stage #10 (1 -> 4): -ShuffleWriterExec(stage_id=10, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_regionkey] - -Query Stage #11 (2 -> 4): -ShuffleWriterExec(stage_id=11, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] - -Query Stage #12 (4 -> 4): -ShuffleWriterExec(stage_id=12, output_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10445614], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10445614..20890506, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..722], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:722..10446336], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10446336..20891947]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] - -Query Stage #13 (4 -> 4): -ShuffleWriterExec(stage_id=13, output_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) - ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_supplycost@4] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=11, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=12, input_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }], 4)) - -Query Stage #14 (4 -> 4): -ShuffleWriterExec(stage_id=14, output_partitioning=Hash([Column { name: "n_regionkey", index: 2 }], 4)) - ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, n_regionkey@0 as n_regionkey] +Query Stage #0 (1 -> 1): +SortExec: TopK(fetch=100), expr=[s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[s_acctbal@5 as s_acctbal, s_name@2 as s_name, n_name@7 as n_name, p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, s_address@3 as s_address, s_phone@4 as s_phone, s_comment@6 as s_comment] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_regionkey@1, ps_partkey@2, ps_supplycost@3] + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, ps_partkey@1), (ps_supplycost@7, min(partsupp.ps_supplycost)@0)], projection=[p_partkey@0, p_mfgr@1, s_name@2, s_address@3, s_phone@4, s_acctbal@5, s_comment@6, n_name@8] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=10, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=13, input_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) - -Query Stage #15 (4 -> 4): -ShuffleWriterExec(stage_id=15, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) - AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@2)], projection=[ps_partkey@1, ps_supplycost@2] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=14, input_partitioning=Hash([Column { name: "n_regionkey", index: 2 }], 4)) - -Query Stage #16 (4 -> 4): -ShuffleWriterExec(stage_id=16, output_partitioning=Hash([Column { name: "ps_partkey", index: 1 }, Column { name: "min(partsupp.ps_supplycost)", index: 0 }], 4)) - ProjectionExec: expr=[min(partsupp.ps_supplycost)@1 as min(partsupp.ps_supplycost), ps_partkey@0 as ps_partkey] - AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=15, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) - -Query Stage #17 (4 -> 4): -ShuffleWriterExec(stage_id=17, output_partitioning=Hash([Column { name: "p_partkey", index: 3 }], 4)) - SortExec: TopK(fetch=100), expr=[s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[s_acctbal@5 as s_acctbal, s_name@2 as s_name, n_name@7 as n_name, p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, s_address@3 as s_address, s_phone@4 as s_phone, s_comment@6 as s_comment] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, ps_partkey@1), (ps_supplycost@7, min(partsupp.ps_supplycost)@0)], projection=[p_partkey@0, p_mfgr@1, s_name@2, s_address@3, s_phone@4, s_acctbal@5, s_comment@6, n_name@8] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }, Column { name: "ps_supplycost", index: 7 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=16, input_partitioning=Hash([Column { name: "ps_partkey", index: 1 }, Column { name: "min(partsupp.ps_supplycost)", index: 0 }], 4)) - -Query Stage #18 (1 -> 1): -GlobalLimitExec: skip=0, fetch=100 - SortPreservingMergeExec: [s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST], fetch=100 - ShuffleReaderExec(stage_id=17, input_partitioning=Hash([Column { name: "p_partkey", index: 3 }], 4)) + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(r_regionkey@0, n_regionkey@9)], projection=[p_partkey@1, p_mfgr@2, s_name@3, s_address@4, s_phone@5, s_acctbal@6, s_comment@7, ps_supplycost@8, n_name@9] + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: r_name@1 = ASIA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] + ProjectionExec: expr=[p_partkey@2 as p_partkey, p_mfgr@3 as p_mfgr, s_name@4 as s_name, s_address@5 as s_address, s_phone@6 as s_phone, s_acctbal@7 as s_acctbal, s_comment@8 as s_comment, ps_supplycost@9 as ps_supplycost, n_name@0 as n_name, n_regionkey@1 as n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@4)], projection=[n_name@1, n_regionkey@2, p_partkey@3, p_mfgr@4, s_name@5, s_address@6, s_phone@8, s_acctbal@9, s_comment@10, ps_supplycost@11] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] + ProjectionExec: expr=[p_partkey@6 as p_partkey, p_mfgr@7 as p_mfgr, s_name@0 as s_name, s_address@1 as s_address, s_nationkey@2 as s_nationkey, s_phone@3 as s_phone, s_acctbal@4 as s_acctbal, s_comment@5 as s_comment, ps_supplycost@8 as ps_supplycost] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@2)], projection=[s_name@1, s_address@2, s_nationkey@3, s_phone@4, s_acctbal@5, s_comment@6, p_partkey@7, p_mfgr@8, ps_supplycost@10] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, ps_partkey@0)], projection=[p_partkey@0, p_mfgr@1, ps_suppkey@3, ps_supplycost@4] + ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_size@3 = 48 AND p_type@2 LIKE %TIN + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_mfgr, p_type, p_size], predicate=p_size@5 = 48 AND p_type@4 LIKE %TIN, pruning_predicate=CASE WHEN p_size_null_count@2 = p_size_row_count@3 THEN false ELSE p_size_min@0 <= 48 AND 48 <= p_size_max@1 END, required_guarantees=[p_size in (48)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41091907]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + ProjectionExec: expr=[min(partsupp.ps_supplycost)@1 as min(partsupp.ps_supplycost), ps_partkey@0 as ps_partkey] + AggregateExec: mode=Single, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(r_regionkey@0, n_regionkey@2)], projection=[ps_partkey@1, ps_supplycost@2] + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: r_name@1 = ASIA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = ASIA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= ASIA AND ASIA <= r_name_max@1 END, required_guarantees=[r_name in (ASIA)] + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, n_regionkey@0 as n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_regionkey@1, ps_partkey@2, ps_supplycost@3] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_regionkey] + ProjectionExec: expr=[ps_partkey@1 as ps_partkey, ps_supplycost@2 as ps_supplycost, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, ps_suppkey@1)], projection=[s_nationkey@1, ps_partkey@2, ps_supplycost@4] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41091907]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] diff --git a/testdata/expected-plans/q20.txt b/testdata/expected-plans/q20.txt index d977fac..5927d2f 100644 --- a/testdata/expected-plans/q20.txt +++ b/testdata/expected-plans/q20.txt @@ -29,126 +29,59 @@ Sort: supplier.s_name ASC NULLS LAST DataFusion Physical Plan ======================== -SortPreservingMergeExec: [s_name@0 ASC NULLS LAST] - SortExec: expr=[s_name@0 ASC NULLS LAST], preserve_partitioning=[true] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_name@1, s_address@2] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 +SortExec: expr=[s_name@0 ASC NULLS LAST], preserve_partitioning=[false] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=LeftSemi, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_name@1, s_address@2] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[s_suppkey@1, s_name@2, s_address@3] + ProjectionExec: expr=[n_nationkey@0 as n_nationkey] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[s_suppkey@1, s_name@2, s_address@3] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 - ProjectionExec: expr=[n_nationkey@0 as n_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = KENYA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = KENYA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= KENYA AND KENYA <= n_name_max@1 END, required_guarantees=[n_name in (KENYA)] + FilterExec: n_name@1 = KENYA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = KENYA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= KENYA AND KENYA <= n_name_max@1 END, required_guarantees=[n_name in (KENYA)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(ps_partkey@0, l_partkey@1), (ps_suppkey@1, l_suppkey@2)], filter=CAST(ps_availqty@0 AS Float64) > Float64(0.5) * sum(lineitem.l_quantity)@1, projection=[ps_suppkey@1] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(p_partkey@0, ps_partkey@0)] + ProjectionExec: expr=[p_partkey@0 as p_partkey] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_nationkey@3], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_partkey@0, l_partkey@1), (ps_suppkey@1, l_suppkey@2)], filter=CAST(ps_availqty@0 AS Float64) > Float64(0.5) * sum(lineitem.l_quantity)@1, projection=[ps_suppkey@1] + FilterExec: p_name@1 LIKE blanched% + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE blanched% + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41091907]]}, projection=[ps_partkey, ps_suppkey, ps_availqty] + ProjectionExec: expr=[0.5 * CAST(sum(lineitem.l_quantity)@2 AS Float64) as Float64(0.5) * sum(lineitem.l_quantity), l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey] + AggregateExec: mode=Single, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] + ProjectionExec: expr=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey, l_quantity@2 as l_quantity] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([ps_partkey@0, ps_suppkey@1], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=RightSemi, on=[(p_partkey@0, ps_partkey@0)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 - ProjectionExec: expr=[p_partkey@0 as p_partkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_name@1 LIKE blanched% - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE blanched% - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10445614], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10445614..20890506, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..722], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:722..10446336], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10446336..20891947]]}, projection=[ps_partkey, ps_suppkey, ps_availqty] - ProjectionExec: expr=[0.5 * CAST(sum(lineitem.l_quantity)@2 AS Float64) as Float64(0.5) * sum(lineitem.l_quantity), l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey] - AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_partkey@0, l_suppkey@1], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] - ProjectionExec: expr=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey, l_quantity@2 as l_quantity] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@3 >= 1993-01-01 AND l_shipdate@3 < 1994-01-01 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], predicate=l_shipdate@10 >= 1993-01-01 AND l_shipdate@10 < 1994-01-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1993-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1994-01-01 END, required_guarantees=[] + FilterExec: l_shipdate@3 >= 1993-01-01 AND l_shipdate@3 < 1994-01-01 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], predicate=l_shipdate@10 >= 1993-01-01 AND l_shipdate@10 < 1994-01-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1993-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1994-01-01 END, required_guarantees=[] RaySQL Plan =========== -Query Stage #0 (1 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ProjectionExec: expr=[n_nationkey@0 as n_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = KENYA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = KENYA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= KENYA AND KENYA <= n_name_max@1 END, required_guarantees=[n_name in (KENYA)] - -Query Stage #1 (2 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4)) - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey] - -Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[s_suppkey@1, s_name@2, s_address@3] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4)) - -Query Stage #3 (2 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - ProjectionExec: expr=[p_partkey@0 as p_partkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_name@1 LIKE blanched% - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE blanched% - -Query Stage #4 (4 -> 4): -ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10445614], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10445614..20890506, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..722], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:722..10446336], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10446336..20891947]]}, projection=[ps_partkey, ps_suppkey, ps_availqty] - -Query Stage #5 (4 -> 4): -ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "ps_partkey", index: 0 }, Column { name: "ps_suppkey", index: 1 }], 4)) +Query Stage #0 (1 -> 1): +SortExec: expr=[s_name@0 ASC NULLS LAST], preserve_partitioning=[false] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=RightSemi, on=[(p_partkey@0, ps_partkey@0)] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) + HashJoinExec: mode=CollectLeft, join_type=LeftSemi, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_name@1, s_address@2] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4)) - -Query Stage #6 (4 -> 4): -ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "l_partkey", index: 0 }, Column { name: "l_suppkey", index: 1 }], 4)) - AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] - ProjectionExec: expr=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey, l_quantity@2 as l_quantity] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@3 >= 1993-01-01 AND l_shipdate@3 < 1994-01-01 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], predicate=l_shipdate@10 >= 1993-01-01 AND l_shipdate@10 < 1994-01-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1993-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1994-01-01 END, required_guarantees=[] - -Query Stage #7 (4 -> 4): -ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_partkey@0, l_partkey@1), (ps_suppkey@1, l_suppkey@2)], filter=CAST(ps_availqty@0 AS Float64) > Float64(0.5) * sum(lineitem.l_quantity)@1, projection=[ps_suppkey@1] + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[s_suppkey@1, s_name@2, s_address@3] + ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = KENYA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = KENYA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= KENYA AND KENYA <= n_name_max@1 END, required_guarantees=[n_name in (KENYA)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_address, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "ps_partkey", index: 0 }, Column { name: "ps_suppkey", index: 1 }], 4)) - ProjectionExec: expr=[0.5 * CAST(sum(lineitem.l_quantity)@2 AS Float64) as Float64(0.5) * sum(lineitem.l_quantity), l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey] - AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(ps_partkey@0, l_partkey@1), (ps_suppkey@1, l_suppkey@2)], filter=CAST(ps_availqty@0 AS Float64) > Float64(0.5) * sum(lineitem.l_quantity)@1, projection=[ps_suppkey@1] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "l_partkey", index: 0 }, Column { name: "l_suppkey", index: 1 }], 4)) - -Query Stage #8 (4 -> 1): -ShuffleWriterExec(stage_id=8, output_partitioning=Hash([], 4)) - SortExec: expr=[s_name@0 ASC NULLS LAST], preserve_partitioning=[true] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(s_suppkey@0, ps_suppkey@0)], projection=[s_name@1, s_address@2] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4)) - -Query Stage #9 (4 -> 1): -SortPreservingMergeExec: [s_name@0 ASC NULLS LAST] - ShuffleReaderExec(stage_id=8, input_partitioning=Hash([], 4)) + HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(p_partkey@0, ps_partkey@0)] + ProjectionExec: expr=[p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_name@1 LIKE blanched% + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE blanched% + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41091907]]}, projection=[ps_partkey, ps_suppkey, ps_availqty] + ProjectionExec: expr=[0.5 * CAST(sum(lineitem.l_quantity)@2 AS Float64) as Float64(0.5) * sum(lineitem.l_quantity), l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey] + AggregateExec: mode=Single, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)] + ProjectionExec: expr=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey, l_quantity@2 as l_quantity] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@3 >= 1993-01-01 AND l_shipdate@3 < 1994-01-01 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], predicate=l_shipdate@10 >= 1993-01-01 AND l_shipdate@10 < 1994-01-01, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1993-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1994-01-01 END, required_guarantees=[] diff --git a/testdata/expected-plans/q21.txt b/testdata/expected-plans/q21.txt index b5e8809..ce41d3d 100644 --- a/testdata/expected-plans/q21.txt +++ b/testdata/expected-plans/q21.txt @@ -37,153 +37,71 @@ Limit: skip=0, fetch=100 DataFusion Physical Plan ======================== -GlobalLimitExec: skip=0, fetch=100 - SortPreservingMergeExec: [numwait@1 DESC,s_name@0 ASC NULLS LAST], fetch=100 - SortExec: TopK(fetch=100), expr=[numwait@1 DESC,s_name@0 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[s_name@0 as s_name, count(*)@1 as numwait] - AggregateExec: mode=FinalPartitioned, gby=[s_name@0 as s_name], aggr=[count(*)] +SortExec: TopK(fetch=100), expr=[numwait@1 DESC,s_name@0 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[s_name@0 as s_name, count(*)@1 as numwait] + AggregateExec: mode=Single, gby=[s_name@0 as s_name], aggr=[count(*)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=LeftAnti, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0, projection=[s_name@0] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_name@0], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[s_name@0 as s_name], aggr=[count(*)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0, projection=[s_name@0] + HashJoinExec: mode=CollectLeft, join_type=LeftSemi, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@1)], projection=[s_name@1, l_orderkey@3, l_suppkey@4] + ProjectionExec: expr=[n_nationkey@0 as n_nationkey] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0 + FilterExec: n_name@1 = ARGENTINA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ARGENTINA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ARGENTINA AND ARGENTINA <= n_name_max@1 END, required_guarantees=[n_name in (ARGENTINA)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@2)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] + ProjectionExec: expr=[o_orderkey@0 as o_orderkey] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_orderkey@1], 4), input_partitions=4 + FilterExec: o_orderstatus@1 = F + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54530383]]}, projection=[o_orderkey, o_orderstatus], predicate=o_orderstatus@2 = F, pruning_predicate=CASE WHEN o_orderstatus_null_count@2 = o_orderstatus_row_count@3 THEN false ELSE o_orderstatus_min@0 <= F AND F <= o_orderstatus_max@1 END, required_guarantees=[o_orderstatus in (F)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_nationkey] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@1)], projection=[s_name@1, l_orderkey@3, l_suppkey@4] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 - ProjectionExec: expr=[n_nationkey@0 as n_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = ARGENTINA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ARGENTINA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ARGENTINA AND ARGENTINA <= n_name_max@1 END, required_guarantees=[n_name in (ARGENTINA)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_nationkey@1], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@2)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 - ProjectionExec: expr=[o_orderkey@0 as o_orderkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderstatus@1 = F - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_orderstatus], predicate=o_orderstatus@2 = F, pruning_predicate=CASE WHEN o_orderstatus_null_count@2 = o_orderstatus_row_count@3 THEN false ELSE o_orderstatus_min@0 <= F AND F <= o_orderstatus_max@1 END, required_guarantees=[o_orderstatus in (F)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_orderkey@2], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_suppkey@1], 4), input_partitions=4 - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_receiptdate@3 > l_commitdate@2 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_suppkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_receiptdate@3 > l_commitdate@2 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + FilterExec: l_receiptdate@3 > l_commitdate@2 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_orderkey, l_suppkey] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_receiptdate@3 > l_commitdate@2 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 RaySQL Plan =========== -Query Stage #0 (1 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ProjectionExec: expr=[n_nationkey@0 as n_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = ARGENTINA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ARGENTINA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ARGENTINA AND ARGENTINA <= n_name_max@1 END, required_guarantees=[n_name in (ARGENTINA)] - -Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - ProjectionExec: expr=[o_orderkey@0 as o_orderkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderstatus@1 = F - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_orderstatus], predicate=o_orderstatus@2 = F, pruning_predicate=CASE WHEN o_orderstatus_null_count@2 = o_orderstatus_row_count@3 THEN false ELSE o_orderstatus_min@0 <= F AND F <= o_orderstatus_max@1 END, required_guarantees=[o_orderstatus in (F)] - -Query Stage #2 (2 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_name, s_nationkey] - -Query Stage #3 (4 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4)) - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_receiptdate@3 > l_commitdate@2 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 - -Query Stage #4 (4 -> 4): -ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "l_orderkey", index: 2 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4)) - -Query Stage #5 (4 -> 4): -ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "s_nationkey", index: 1 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@2)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) +Query Stage #0 (1 -> 1): +SortExec: TopK(fetch=100), expr=[numwait@1 DESC,s_name@0 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[s_name@0 as s_name, count(*)@1 as numwait] + AggregateExec: mode=Single, gby=[s_name@0 as s_name], aggr=[count(*)] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "l_orderkey", index: 2 }], 4)) - -Query Stage #6 (4 -> 4): -ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "l_orderkey", index: 1 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@1)], projection=[s_name@1, l_orderkey@3, l_suppkey@4] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "s_nationkey", index: 1 }], 4)) - -Query Stage #7 (4 -> 4): -ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_suppkey] - -Query Stage #8 (4 -> 4): -ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_receiptdate@3 > l_commitdate@2 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 - -Query Stage #9 (4 -> 4): -ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "s_name", index: 0 }], 4)) - AggregateExec: mode=Partial, gby=[s_name@0 as s_name], aggr=[count(*)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0, projection=[s_name@0] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0 - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "l_orderkey", index: 1 }], 4)) + HashJoinExec: mode=CollectLeft, join_type=LeftAnti, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0, projection=[s_name@0] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=LeftSemi, on=[(l_orderkey@1, l_orderkey@0)], filter=l_suppkey@1 != l_suppkey@0 + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@1)], projection=[s_name@1, l_orderkey@3, l_suppkey@4] + ProjectionExec: expr=[n_nationkey@0 as n_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = ARGENTINA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = ARGENTINA, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= ARGENTINA AND ARGENTINA <= n_name_max@1 END, required_guarantees=[n_name in (ARGENTINA)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@2)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] + ProjectionExec: expr=[o_orderkey@0 as o_orderkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderstatus@1 = F + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54530383]]}, projection=[o_orderkey, o_orderstatus], predicate=o_orderstatus@2 = F, pruning_predicate=CASE WHEN o_orderstatus_null_count@2 = o_orderstatus_row_count@3 THEN false ELSE o_orderstatus_min@0 <= F AND F <= o_orderstatus_max@1 END, required_guarantees=[o_orderstatus in (F)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_name, s_nationkey] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_receiptdate@3 > l_commitdate@2 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_orderkey, l_suppkey] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - -Query Stage #10 (4 -> 4): -ShuffleWriterExec(stage_id=10, output_partitioning=Hash([Column { name: "s_name", index: 0 }], 4)) - SortExec: TopK(fetch=100), expr=[numwait@1 DESC,s_name@0 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[s_name@0 as s_name, count(*)@1 as numwait] - AggregateExec: mode=FinalPartitioned, gby=[s_name@0 as s_name], aggr=[count(*)] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "s_name", index: 0 }], 4)) - -Query Stage #11 (1 -> 1): -GlobalLimitExec: skip=0, fetch=100 - SortPreservingMergeExec: [numwait@1 DESC,s_name@0 ASC NULLS LAST], fetch=100 - ShuffleReaderExec(stage_id=10, input_partitioning=Hash([Column { name: "s_name", index: 0 }], 4)) + FilterExec: l_receiptdate@3 > l_commitdate@2 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 diff --git a/testdata/expected-plans/q22.txt b/testdata/expected-plans/q22.txt index 564ce9c..04c8a49 100644 --- a/testdata/expected-plans/q22.txt +++ b/testdata/expected-plans/q22.txt @@ -22,78 +22,41 @@ Sort: custsale.cntrycode ASC NULLS LAST DataFusion Physical Plan ======================== -SortPreservingMergeExec: [cntrycode@0 ASC NULLS LAST] - SortExec: expr=[cntrycode@0 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[cntrycode@0 as cntrycode, count(*)@1 as numcust, sum(custsale.c_acctbal)@2 as totacctbal] - AggregateExec: mode=FinalPartitioned, gby=[cntrycode@0 as cntrycode], aggr=[count(*), sum(custsale.c_acctbal)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([cntrycode@0], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[cntrycode@0 as cntrycode], aggr=[count(*), sum(custsale.c_acctbal)] - ProjectionExec: expr=[substr(c_phone@1, 1, 2) as cntrycode, c_acctbal@2 as c_acctbal] - NestedLoopJoinExec: join_type=Inner, filter=CAST(c_acctbal@0 AS Decimal128(15, 6)) > avg(customer.c_acctbal)@1 - AggregateExec: mode=Final, gby=[], aggr=[avg(customer.c_acctbal)] - CoalescePartitionsExec - AggregateExec: mode=Partial, gby=[], aggr=[avg(customer.c_acctbal)] - ProjectionExec: expr=[c_acctbal@1 as c_acctbal] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: c_acctbal@1 > Some(0),11,2 AND Use substr(c_phone@0, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3390424], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3390424..6780848], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6780848..6782476, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3388796], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3388796..6779220]]}, projection=[c_phone, c_acctbal], predicate=c_acctbal@5 > Some(0),11,2 AND Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) AND c_acctbal@5 > Some(0),11,2, pruning_predicate=CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END AND CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END, required_guarantees=[] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(c_custkey@0, o_custkey@0)], projection=[c_phone@1, c_acctbal@2] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: Use substr(c_phone@1, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3390424], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3390424..6780848], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6780848..6782476, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3388796], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3388796..6779220]]}, projection=[c_custkey, c_phone, c_acctbal], predicate=Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_custkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_custkey] +SortExec: expr=[cntrycode@0 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[cntrycode@0 as cntrycode, count(*)@1 as numcust, sum(custsale.c_acctbal)@2 as totacctbal] + AggregateExec: mode=Single, gby=[cntrycode@0 as cntrycode], aggr=[count(*), sum(custsale.c_acctbal)] + ProjectionExec: expr=[substr(c_phone@1, 1, 2) as cntrycode, c_acctbal@2 as c_acctbal] + NestedLoopJoinExec: join_type=Inner, filter=CAST(c_acctbal@0 AS Decimal128(15, 6)) > avg(customer.c_acctbal)@1 + AggregateExec: mode=Single, gby=[], aggr=[avg(customer.c_acctbal)] + ProjectionExec: expr=[c_acctbal@1 as c_acctbal] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: c_acctbal@1 > Some(0),11,2 AND Use substr(c_phone@0, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13211178]]}, projection=[c_phone, c_acctbal], predicate=c_acctbal@5 > Some(0),11,2 AND Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) AND c_acctbal@5 > Some(0),11,2, pruning_predicate=CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END AND CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END, required_guarantees=[] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=LeftAnti, on=[(c_custkey@0, o_custkey@0)], projection=[c_phone@1, c_acctbal@2] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: Use substr(c_phone@1, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13211178]]}, projection=[c_custkey, c_phone, c_acctbal], predicate=Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54530383]]}, projection=[o_custkey] RaySQL Plan =========== -Query Stage #0 (4 -> 1): -ShuffleWriterExec(stage_id=0, output_partitioning=UnknownPartitioning(4)) - AggregateExec: mode=Partial, gby=[], aggr=[avg(customer.c_acctbal)] - ProjectionExec: expr=[c_acctbal@1 as c_acctbal] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: c_acctbal@1 > Some(0),11,2 AND Use substr(c_phone@0, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3390424], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3390424..6780848], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6780848..6782476, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3388796], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3388796..6779220]]}, projection=[c_phone, c_acctbal], predicate=c_acctbal@5 > Some(0),11,2 AND Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) AND c_acctbal@5 > Some(0),11,2, pruning_predicate=CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END AND CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END, required_guarantees=[] - -Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: Use substr(c_phone@1, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3390424], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3390424..6780848], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6780848..6782476, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3388796], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3388796..6779220]]}, projection=[c_custkey, c_phone, c_acctbal], predicate=Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) - -Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "o_custkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_custkey] - -Query Stage #3 (4 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "cntrycode", index: 0 }], 4)) - AggregateExec: mode=Partial, gby=[cntrycode@0 as cntrycode], aggr=[count(*), sum(custsale.c_acctbal)] - ProjectionExec: expr=[substr(c_phone@1, 1, 2) as cntrycode, c_acctbal@2 as c_acctbal] - NestedLoopJoinExec: join_type=Inner, filter=CAST(c_acctbal@0 AS Decimal128(15, 6)) > avg(customer.c_acctbal)@1 - AggregateExec: mode=Final, gby=[], aggr=[avg(customer.c_acctbal)] - CoalescePartitionsExec - ShuffleReaderExec(stage_id=0, input_partitioning=UnknownPartitioning(4)) - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(c_custkey@0, o_custkey@0)], projection=[c_phone@1, c_acctbal@2] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "o_custkey", index: 0 }], 4)) - -Query Stage #4 (4 -> 4): -ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "cntrycode", index: 0 }], 4)) - SortExec: expr=[cntrycode@0 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[cntrycode@0 as cntrycode, count(*)@1 as numcust, sum(custsale.c_acctbal)@2 as totacctbal] - AggregateExec: mode=FinalPartitioned, gby=[cntrycode@0 as cntrycode], aggr=[count(*), sum(custsale.c_acctbal)] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "cntrycode", index: 0 }], 4)) - -Query Stage #5 (4 -> 1): -SortPreservingMergeExec: [cntrycode@0 ASC NULLS LAST] - ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "cntrycode", index: 0 }], 4)) +Query Stage #0 (1 -> 1): +SortExec: expr=[cntrycode@0 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[cntrycode@0 as cntrycode, count(*)@1 as numcust, sum(custsale.c_acctbal)@2 as totacctbal] + AggregateExec: mode=Single, gby=[cntrycode@0 as cntrycode], aggr=[count(*), sum(custsale.c_acctbal)] + ProjectionExec: expr=[substr(c_phone@1, 1, 2) as cntrycode, c_acctbal@2 as c_acctbal] + NestedLoopJoinExec: join_type=Inner, filter=CAST(c_acctbal@0 AS Decimal128(15, 6)) > avg(customer.c_acctbal)@1 + AggregateExec: mode=Single, gby=[], aggr=[avg(customer.c_acctbal)] + ProjectionExec: expr=[c_acctbal@1 as c_acctbal] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: c_acctbal@1 > Some(0),11,2 AND Use substr(c_phone@0, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13211178]]}, projection=[c_phone, c_acctbal], predicate=c_acctbal@5 > Some(0),11,2 AND Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) AND c_acctbal@5 > Some(0),11,2, pruning_predicate=CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END AND CASE WHEN c_acctbal_null_count@1 = c_acctbal_row_count@2 THEN false ELSE c_acctbal_max@0 > Some(0),11,2 END, required_guarantees=[] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=LeftAnti, on=[(c_custkey@0, o_custkey@0)], projection=[c_phone@1, c_acctbal@2] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: Use substr(c_phone@1, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13211178]]}, projection=[c_custkey, c_phone, c_acctbal], predicate=Use substr(c_phone@4, 1, 2) IN (SET) ([Literal { value: Utf8("24") }, Literal { value: Utf8("34") }, Literal { value: Utf8("16") }, Literal { value: Utf8("30") }, Literal { value: Utf8("33") }, Literal { value: Utf8("14") }, Literal { value: Utf8("13") }]) + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54530383]]}, projection=[o_custkey] diff --git a/testdata/expected-plans/q3.txt b/testdata/expected-plans/q3.txt index 790fc2d..84f91b2 100644 --- a/testdata/expected-plans/q3.txt +++ b/testdata/expected-plans/q3.txt @@ -21,90 +21,45 @@ Limit: skip=0, fetch=10 DataFusion Physical Plan ======================== -GlobalLimitExec: skip=0, fetch=10 - SortPreservingMergeExec: [revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], fetch=10 - SortExec: TopK(fetch=10), expr=[revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@3 as revenue, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority] - AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] +SortExec: TopK(fetch=10), expr=[revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@3 as revenue, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority] + AggregateExec: mode=Single, gby=[l_orderkey@2 as l_orderkey, o_orderdate@0 as o_orderdate, o_shippriority@1 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, o_shippriority@2, l_orderkey@3, l_extendedprice@4, l_discount@5] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_orderkey@0, o_orderdate@1, o_shippriority@2], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[l_orderkey@2 as l_orderkey, o_orderdate@0 as o_orderdate, o_shippriority@1 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[o_orderkey@1, o_orderdate@3, o_shippriority@4] + ProjectionExec: expr=[c_custkey@0 as c_custkey] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, o_shippriority@2, l_orderkey@3, l_extendedprice@4, l_discount@5] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[o_orderkey@1, o_orderdate@3, o_shippriority@4] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 - ProjectionExec: expr=[c_custkey@0 as c_custkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: c_mktsegment@1 = BUILDING - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3390424], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3390424..6780848], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6780848..6782476, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3388796], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3388796..6779220]]}, projection=[c_custkey, c_mktsegment], predicate=c_mktsegment@6 = BUILDING, pruning_predicate=CASE WHEN c_mktsegment_null_count@2 = c_mktsegment_row_count@3 THEN false ELSE c_mktsegment_min@0 <= BUILDING AND BUILDING <= c_mktsegment_max@1 END, required_guarantees=[c_mktsegment in (BUILDING)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 < 1995-03-15 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], predicate=o_orderdate@4 < 1995-03-15, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@0 < 1995-03-15 END, required_guarantees=[] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@3 > 1995-03-15 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 > 1995-03-15, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 > 1995-03-15 END, required_guarantees=[] + FilterExec: c_mktsegment@1 = BUILDING + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13211178]]}, projection=[c_custkey, c_mktsegment], predicate=c_mktsegment@6 = BUILDING, pruning_predicate=CASE WHEN c_mktsegment_null_count@2 = c_mktsegment_row_count@3 THEN false ELSE c_mktsegment_min@0 <= BUILDING AND BUILDING <= c_mktsegment_max@1 END, required_guarantees=[c_mktsegment in (BUILDING)] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@2 < 1995-03-15 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54530383]]}, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], predicate=o_orderdate@4 < 1995-03-15, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@0 < 1995-03-15 END, required_guarantees=[] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@3 > 1995-03-15 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 > 1995-03-15, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 > 1995-03-15 END, required_guarantees=[] RaySQL Plan =========== -Query Stage #0 (4 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - ProjectionExec: expr=[c_custkey@0 as c_custkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: c_mktsegment@1 = BUILDING - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3390424], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3390424..6780848], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6780848..6782476, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3388796], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3388796..6779220]]}, projection=[c_custkey, c_mktsegment], predicate=c_mktsegment@6 = BUILDING, pruning_predicate=CASE WHEN c_mktsegment_null_count@2 = c_mktsegment_row_count@3 THEN false ELSE c_mktsegment_min@0 <= BUILDING AND BUILDING <= c_mktsegment_max@1 END, required_guarantees=[c_mktsegment in (BUILDING)] - -Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 < 1995-03-15 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], predicate=o_orderdate@4 < 1995-03-15, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@0 < 1995-03-15 END, required_guarantees=[] - -Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[o_orderkey@1, o_orderdate@3, o_shippriority@4] +Query Stage #0 (1 -> 1): +SortExec: TopK(fetch=10), expr=[revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@3 as revenue, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority] + AggregateExec: mode=Single, gby=[l_orderkey@2 as l_orderkey, o_orderdate@0 as o_orderdate, o_shippriority@1 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) - -Query Stage #3 (4 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@3 > 1995-03-15 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 > 1995-03-15, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 > 1995-03-15 END, required_guarantees=[] - -Query Stage #4 (4 -> 4): -ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }, Column { name: "o_orderdate", index: 1 }, Column { name: "o_shippriority", index: 2 }], 4)) - AggregateExec: mode=Partial, gby=[l_orderkey@2 as l_orderkey, o_orderdate@0 as o_orderdate, o_shippriority@1 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, o_shippriority@2, l_orderkey@3, l_extendedprice@4, l_discount@5] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - -Query Stage #5 (4 -> 4): -ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }, Column { name: "o_orderdate", index: 2 }, Column { name: "o_shippriority", index: 3 }], 4)) - SortExec: TopK(fetch=10), expr=[revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[l_orderkey@0 as l_orderkey, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@3 as revenue, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority] - AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }, Column { name: "o_orderdate", index: 1 }, Column { name: "o_shippriority", index: 2 }], 4)) - -Query Stage #6 (1 -> 1): -GlobalLimitExec: skip=0, fetch=10 - SortPreservingMergeExec: [revenue@1 DESC,o_orderdate@2 ASC NULLS LAST], fetch=10 - ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }, Column { name: "o_orderdate", index: 2 }, Column { name: "o_shippriority", index: 3 }], 4)) + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, o_shippriority@2, l_orderkey@3, l_extendedprice@4, l_discount@5] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[o_orderkey@1, o_orderdate@3, o_shippriority@4] + ProjectionExec: expr=[c_custkey@0 as c_custkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: c_mktsegment@1 = BUILDING + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13211178]]}, projection=[c_custkey, c_mktsegment], predicate=c_mktsegment@6 = BUILDING, pruning_predicate=CASE WHEN c_mktsegment_null_count@2 = c_mktsegment_row_count@3 THEN false ELSE c_mktsegment_min@0 <= BUILDING AND BUILDING <= c_mktsegment_max@1 END, required_guarantees=[c_mktsegment in (BUILDING)] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@2 < 1995-03-15 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54530383]]}, projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority], predicate=o_orderdate@4 < 1995-03-15, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@0 < 1995-03-15 END, required_guarantees=[] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@3 > 1995-03-15 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 > 1995-03-15, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 > 1995-03-15 END, required_guarantees=[] diff --git a/testdata/expected-plans/q4.txt b/testdata/expected-plans/q4.txt index c830fbd..8dc100a 100644 --- a/testdata/expected-plans/q4.txt +++ b/testdata/expected-plans/q4.txt @@ -17,64 +17,35 @@ Sort: orders.o_orderpriority ASC NULLS LAST DataFusion Physical Plan ======================== -SortPreservingMergeExec: [o_orderpriority@0 ASC NULLS LAST] - SortExec: expr=[o_orderpriority@0 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[o_orderpriority@0 as o_orderpriority, count(*)@1 as order_count] - AggregateExec: mode=FinalPartitioned, gby=[o_orderpriority@0 as o_orderpriority], aggr=[count(*)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_orderpriority@0], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[o_orderpriority@0 as o_orderpriority], aggr=[count(*)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderpriority@1] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 - ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_orderpriority@2 as o_orderpriority] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@1 >= 1995-04-01 AND o_orderdate@1 < 1995-07-01 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_orderdate, o_orderpriority], predicate=o_orderdate@4 >= 1995-04-01 AND o_orderdate@4 < 1995-07-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-04-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-07-01 END, required_guarantees=[] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 - ProjectionExec: expr=[l_orderkey@0 as l_orderkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_receiptdate@2 > l_commitdate@1 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 +SortExec: expr=[o_orderpriority@0 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[o_orderpriority@0 as o_orderpriority, count(*)@1 as order_count] + AggregateExec: mode=Single, gby=[o_orderpriority@0 as o_orderpriority], aggr=[count(*)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=LeftSemi, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderpriority@1] + ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_orderpriority@2 as o_orderpriority] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@1 >= 1995-04-01 AND o_orderdate@1 < 1995-07-01 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54530383]]}, projection=[o_orderkey, o_orderdate, o_orderpriority], predicate=o_orderdate@4 >= 1995-04-01 AND o_orderdate@4 < 1995-07-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-04-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-07-01 END, required_guarantees=[] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_receiptdate@2 > l_commitdate@1 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_orderkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 RaySQL Plan =========== -Query Stage #0 (4 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_orderpriority@2 as o_orderpriority] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@1 >= 1995-04-01 AND o_orderdate@1 < 1995-07-01 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_orderdate, o_orderpriority], predicate=o_orderdate@4 >= 1995-04-01 AND o_orderdate@4 < 1995-07-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-04-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-07-01 END, required_guarantees=[] - -Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - ProjectionExec: expr=[l_orderkey@0 as l_orderkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_receiptdate@2 > l_commitdate@1 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 - -Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "o_orderpriority", index: 0 }], 4)) - AggregateExec: mode=Partial, gby=[o_orderpriority@0 as o_orderpriority], aggr=[count(*)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderpriority@1] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - -Query Stage #3 (4 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "o_orderpriority", index: 0 }], 4)) - SortExec: expr=[o_orderpriority@0 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[o_orderpriority@0 as o_orderpriority, count(*)@1 as order_count] - AggregateExec: mode=FinalPartitioned, gby=[o_orderpriority@0 as o_orderpriority], aggr=[count(*)] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "o_orderpriority", index: 0 }], 4)) - -Query Stage #4 (4 -> 1): -SortPreservingMergeExec: [o_orderpriority@0 ASC NULLS LAST] - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "o_orderpriority", index: 0 }], 4)) +Query Stage #0 (1 -> 1): +SortExec: expr=[o_orderpriority@0 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[o_orderpriority@0 as o_orderpriority, count(*)@1 as order_count] + AggregateExec: mode=Single, gby=[o_orderpriority@0 as o_orderpriority], aggr=[count(*)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=LeftSemi, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderpriority@1] + ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_orderpriority@2 as o_orderpriority] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@1 >= 1995-04-01 AND o_orderdate@1 < 1995-07-01 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54530383]]}, projection=[o_orderkey, o_orderdate, o_orderpriority], predicate=o_orderdate@4 >= 1995-04-01 AND o_orderdate@4 < 1995-07-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-04-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-07-01 END, required_guarantees=[] + ProjectionExec: expr=[l_orderkey@0 as l_orderkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_receiptdate@2 > l_commitdate@1 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_orderkey, l_commitdate, l_receiptdate], predicate=l_receiptdate@12 > l_commitdate@11 diff --git a/testdata/expected-plans/q5.txt b/testdata/expected-plans/q5.txt index 9773172..ec776aa 100644 --- a/testdata/expected-plans/q5.txt +++ b/testdata/expected-plans/q5.txt @@ -28,150 +28,63 @@ Sort: revenue DESC NULLS FIRST DataFusion Physical Plan ======================== -SortPreservingMergeExec: [revenue@1 DESC] - SortExec: expr=[revenue@1 DESC], preserve_partitioning=[true] - ProjectionExec: expr=[n_name@0 as n_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as revenue] - AggregateExec: mode=FinalPartitioned, gby=[n_name@0 as n_name], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_name@0], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[n_name@2 as n_name], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, n_name@3] +SortExec: expr=[revenue@1 DESC], preserve_partitioning=[false] + ProjectionExec: expr=[n_name@0 as n_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as revenue] + AggregateExec: mode=Single, gby=[n_name@2 as n_name], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, n_name@3] + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: r_name@1 = AFRICA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = AFRICA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= AFRICA AND AFRICA <= r_name_max@1 END, required_guarantees=[r_name in (AFRICA)] + ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, n_name@0 as n_name, n_regionkey@1 as n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, n_regionkey@2, l_extendedprice@3, l_discount@4] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@0 as s_nationkey] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1), (s_nationkey@1, c_nationkey@0)], projection=[s_nationkey@1, l_extendedprice@4, l_discount@5] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@1, l_orderkey@0)], projection=[c_nationkey@0, l_suppkey@3, l_extendedprice@4, l_discount@5] CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = AFRICA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = AFRICA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= AFRICA AND AFRICA <= r_name_max@1 END, required_guarantees=[r_name in (AFRICA)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_regionkey@3], 4), input_partitions=4 - ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, n_name@0 as n_name, n_regionkey@1 as n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, n_regionkey@2, l_extendedprice@3, l_discount@4] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4 - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1), (s_nationkey@1, c_nationkey@0)], projection=[s_nationkey@1, l_extendedprice@4, l_discount@5] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_suppkey@0, s_nationkey@1], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_suppkey@1, c_nationkey@0], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@1, l_orderkey@0)], projection=[c_nationkey@0, l_suppkey@3, l_extendedprice@4, l_discount@5] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_orderkey@1], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_nationkey@1, o_orderkey@2] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3390424], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3390424..6780848], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6780848..6782476, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3388796], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3388796..6779220]]}, projection=[c_custkey, c_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4 - ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 >= 1994-01-01 AND o_orderdate@2 < 1995-01-01 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1994-01-01 AND o_orderdate@4 < 1995-01-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1994-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-01-01 END, required_guarantees=[] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_nationkey@1, o_orderkey@2] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13211178]]}, projection=[c_custkey, c_nationkey] + ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@2 >= 1994-01-01 AND o_orderdate@2 < 1995-01-01 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54530383]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1994-01-01 AND o_orderdate@4 < 1995-01-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1994-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-01-01 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] RaySQL Plan =========== -Query Stage #0 (1 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = AFRICA - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = AFRICA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= AFRICA AND AFRICA <= r_name_max@1 END, required_guarantees=[r_name in (AFRICA)] - -Query Stage #1 (1 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] - -Query Stage #2 (2 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }, Column { name: "s_nationkey", index: 1 }], 4)) - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] - -Query Stage #3 (4 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3390424], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3390424..6780848], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6780848..6782476, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3388796], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3388796..6779220]]}, projection=[c_custkey, c_nationkey] - -Query Stage #4 (4 -> 4): -ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) - ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 >= 1994-01-01 AND o_orderdate@2 < 1995-01-01 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1994-01-01 AND o_orderdate@4 < 1995-01-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1994-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-01-01 END, required_guarantees=[] - -Query Stage #5 (4 -> 4): -ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "o_orderkey", index: 1 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_nationkey@1, o_orderkey@2] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) +Query Stage #0 (1 -> 1): +SortExec: expr=[revenue@1 DESC], preserve_partitioning=[false] + ProjectionExec: expr=[n_name@0 as n_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as revenue] + AggregateExec: mode=Single, gby=[n_name@2 as n_name], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "o_custkey", index: 1 }], 4)) - -Query Stage #6 (4 -> 4): -ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] - -Query Stage #7 (4 -> 4): -ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "l_suppkey", index: 1 }, Column { name: "c_nationkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@1, l_orderkey@0)], projection=[c_nationkey@0, l_suppkey@3, l_extendedprice@4, l_discount@5] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "o_orderkey", index: 1 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - -Query Stage #8 (4 -> 4): -ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1), (s_nationkey@1, c_nationkey@0)], projection=[s_nationkey@1, l_extendedprice@4, l_discount@5] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }, Column { name: "s_nationkey", index: 1 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "l_suppkey", index: 1 }, Column { name: "c_nationkey", index: 0 }], 4)) - -Query Stage #9 (4 -> 4): -ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "n_regionkey", index: 3 }], 4)) - ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, n_name@0 as n_name, n_regionkey@1 as n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, n_regionkey@2, l_extendedprice@3, l_discount@4] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) - -Query Stage #10 (4 -> 4): -ShuffleWriterExec(stage_id=10, output_partitioning=Hash([Column { name: "n_name", index: 0 }], 4)) - AggregateExec: mode=Partial, gby=[n_name@2 as n_name], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, n_name@3] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "n_regionkey", index: 3 }], 4)) - -Query Stage #11 (4 -> 4): -ShuffleWriterExec(stage_id=11, output_partitioning=Hash([Column { name: "n_name", index: 0 }], 4)) - SortExec: expr=[revenue@1 DESC], preserve_partitioning=[true] - ProjectionExec: expr=[n_name@0 as n_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as revenue] - AggregateExec: mode=FinalPartitioned, gby=[n_name@0 as n_name], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=10, input_partitioning=Hash([Column { name: "n_name", index: 0 }], 4)) - -Query Stage #12 (4 -> 1): -SortPreservingMergeExec: [revenue@1 DESC] - ShuffleReaderExec(stage_id=11, input_partitioning=Hash([Column { name: "n_name", index: 0 }], 4)) + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, n_name@3] + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: r_name@1 = AFRICA + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = AFRICA, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= AFRICA AND AFRICA <= r_name_max@1 END, required_guarantees=[r_name in (AFRICA)] + ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, n_name@0 as n_name, n_regionkey@1 as n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, n_regionkey@2, l_extendedprice@3, l_discount@4] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name, n_regionkey] + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1), (s_nationkey@1, c_nationkey@0)], projection=[s_nationkey@1, l_extendedprice@4, l_discount@5] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@1, l_orderkey@0)], projection=[c_nationkey@0, l_suppkey@3, l_extendedprice@4, l_discount@5] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_nationkey@1, o_orderkey@2] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13211178]]}, projection=[c_custkey, c_nationkey] + ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@2 >= 1994-01-01 AND o_orderdate@2 < 1995-01-01 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54530383]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1994-01-01 AND o_orderdate@4 < 1995-01-01, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1994-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 < 1995-01-01 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] diff --git a/testdata/expected-plans/q6.txt b/testdata/expected-plans/q6.txt index 6f9143d..d505b02 100644 --- a/testdata/expected-plans/q6.txt +++ b/testdata/expected-plans/q6.txt @@ -11,28 +11,20 @@ DataFusion Physical Plan ======================== ProjectionExec: expr=[sum(lineitem.l_extendedprice * lineitem.l_discount)@0 as revenue] - AggregateExec: mode=Final, gby=[], aggr=[sum(lineitem.l_extendedprice * lineitem.l_discount)] - CoalescePartitionsExec - AggregateExec: mode=Partial, gby=[], aggr=[sum(lineitem.l_extendedprice * lineitem.l_discount)] - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@3 >= 1994-01-01 AND l_shipdate@3 < 1995-01-01 AND l_discount@2 >= Some(3),11,2 AND l_discount@2 <= Some(5),11,2 AND l_quantity@0 < Some(2400),11,2 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1994-01-01 AND l_shipdate@10 < 1995-01-01 AND l_discount@6 >= Some(3),11,2 AND l_discount@6 <= Some(5),11,2 AND l_quantity@4 < Some(2400),11,2, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1994-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-01-01 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_max@4 >= Some(3),11,2 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_min@7 <= Some(5),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@8 < Some(2400),11,2 END, required_guarantees=[] + AggregateExec: mode=Single, gby=[], aggr=[sum(lineitem.l_extendedprice * lineitem.l_discount)] + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@3 >= 1994-01-01 AND l_shipdate@3 < 1995-01-01 AND l_discount@2 >= Some(3),11,2 AND l_discount@2 <= Some(5),11,2 AND l_quantity@0 < Some(2400),11,2 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1994-01-01 AND l_shipdate@10 < 1995-01-01 AND l_discount@6 >= Some(3),11,2 AND l_discount@6 <= Some(5),11,2 AND l_quantity@4 < Some(2400),11,2, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1994-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-01-01 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_max@4 >= Some(3),11,2 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_min@7 <= Some(5),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@8 < Some(2400),11,2 END, required_guarantees=[] RaySQL Plan =========== -Query Stage #0 (4 -> 1): -ShuffleWriterExec(stage_id=0, output_partitioning=UnknownPartitioning(4)) - AggregateExec: mode=Partial, gby=[], aggr=[sum(lineitem.l_extendedprice * lineitem.l_discount)] +Query Stage #0 (1 -> 1): +ProjectionExec: expr=[sum(lineitem.l_extendedprice * lineitem.l_discount)@0 as revenue] + AggregateExec: mode=Single, gby=[], aggr=[sum(lineitem.l_extendedprice * lineitem.l_discount)] ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] CoalesceBatchesExec: target_batch_size=8192 FilterExec: l_shipdate@3 >= 1994-01-01 AND l_shipdate@3 < 1995-01-01 AND l_discount@2 >= Some(3),11,2 AND l_discount@2 <= Some(5),11,2 AND l_quantity@0 < Some(2400),11,2 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1994-01-01 AND l_shipdate@10 < 1995-01-01 AND l_discount@6 >= Some(3),11,2 AND l_discount@6 <= Some(5),11,2 AND l_quantity@4 < Some(2400),11,2, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1994-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-01-01 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_max@4 >= Some(3),11,2 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_min@7 <= Some(5),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@8 < Some(2400),11,2 END, required_guarantees=[] - -Query Stage #1 (1 -> 1): -ProjectionExec: expr=[sum(lineitem.l_extendedprice * lineitem.l_discount)@0 as revenue] - AggregateExec: mode=Final, gby=[], aggr=[sum(lineitem.l_extendedprice * lineitem.l_discount)] - CoalescePartitionsExec - ShuffleReaderExec(stage_id=0, input_partitioning=UnknownPartitioning(4)) + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_quantity, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1994-01-01 AND l_shipdate@10 < 1995-01-01 AND l_discount@6 >= Some(3),11,2 AND l_discount@6 <= Some(5),11,2 AND l_quantity@4 < Some(2400),11,2, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1994-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 < 1995-01-01 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_max@4 >= Some(3),11,2 END AND CASE WHEN l_discount_null_count@5 = l_discount_row_count@6 THEN false ELSE l_discount_min@7 <= Some(5),11,2 END AND CASE WHEN l_quantity_null_count@9 = l_quantity_row_count@10 THEN false ELSE l_quantity_min@8 < Some(2400),11,2 END, required_guarantees=[] diff --git a/testdata/expected-plans/q7.txt b/testdata/expected-plans/q7.txt index 5253594..74d297b 100644 --- a/testdata/expected-plans/q7.txt +++ b/testdata/expected-plans/q7.txt @@ -30,153 +30,65 @@ Sort: shipping.supp_nation ASC NULLS LAST, shipping.cust_nation ASC NULLS LAST, DataFusion Physical Plan ======================== -SortPreservingMergeExec: [supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS LAST,l_year@2 ASC NULLS LAST] - SortExec: expr=[supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS LAST,l_year@2 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year, sum(shipping.volume)@3 as revenue] - AggregateExec: mode=FinalPartitioned, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[sum(shipping.volume)] +SortExec: expr=[supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS LAST,l_year@2 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year, sum(shipping.volume)@3 as revenue] + AggregateExec: mode=Single, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[sum(shipping.volume)] + ProjectionExec: expr=[n_name@4 as supp_nation, n_name@0 as cust_nation, date_part(YEAR, l_shipdate@3) as l_year, l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as volume] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([supp_nation@0, cust_nation@1, l_year@2], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[sum(shipping.volume)] - ProjectionExec: expr=[n_name@4 as supp_nation, n_name@0 as cust_nation, date_part(YEAR, l_shipdate@3) as l_year, l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as volume] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], filter=n_name@0 = GERMANY AND n_name@1 = IRAQ OR n_name@0 = IRAQ AND n_name@1 = GERMANY, projection=[n_name@1, l_extendedprice@2, l_discount@3, l_shipdate@4, n_name@6] + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], filter=n_name@0 = GERMANY AND n_name@1 = IRAQ OR n_name@0 = IRAQ AND n_name@1 = GERMANY, projection=[n_name@1, l_extendedprice@2, l_discount@3, l_shipdate@4, n_name@6] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = IRAQ OR n_name@1 = GERMANY + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = IRAQ OR n_name@1 = GERMANY, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_shipdate@3 as l_shipdate, c_nationkey@4 as c_nationkey, n_name@0 as n_name] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@0)], projection=[n_name@1, l_extendedprice@3, l_discount@4, l_shipdate@5, c_nationkey@6] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = GERMANY OR n_name@1 = IRAQ + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = GERMANY OR n_name@1 = IRAQ, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END, required_guarantees=[n_name in (IRAQ, GERMANY)] + ProjectionExec: expr=[s_nationkey@1 as s_nationkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, l_shipdate@4 as l_shipdate, c_nationkey@0 as c_nationkey] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = IRAQ OR n_name@1 = GERMANY - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = IRAQ OR n_name@1 = GERMANY, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END, required_guarantees=[n_name in (IRAQ, GERMANY)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([c_nationkey@3], 4), input_partitions=4 - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_shipdate@3 as l_shipdate, c_nationkey@4 as c_nationkey, n_name@0 as n_name] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@0)], projection=[n_name@1, l_extendedprice@3, l_discount@4, l_shipdate@5, c_nationkey@6] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = GERMANY OR n_name@1 = IRAQ - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = GERMANY OR n_name@1 = IRAQ, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_nationkey@0], 4), input_partitions=4 - ProjectionExec: expr=[s_nationkey@1 as s_nationkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, l_shipdate@4 as l_shipdate, c_nationkey@0 as c_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@4)], projection=[c_nationkey@1, s_nationkey@2, l_extendedprice@3, l_discount@4, l_shipdate@5] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3390424], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3390424..6780848], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6780848..6782476, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3388796], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3388796..6779220]]}, projection=[c_custkey, c_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_custkey@4], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_orderkey@1, o_orderkey@0)], projection=[s_nationkey@0, l_extendedprice@2, l_discount@3, l_shipdate@4, o_custkey@6] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_orderkey@1], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5, l_shipdate@6] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_suppkey@1], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@4 >= 1995-01-01 AND l_shipdate@4 <= 1996-12-31 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-01-01 AND l_shipdate@10 <= 1996-12-31, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 <= 1996-12-31 END, required_guarantees=[] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_custkey] + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@4)], projection=[c_nationkey@1, s_nationkey@2, l_extendedprice@3, l_discount@4, l_shipdate@5] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13211178]]}, projection=[c_custkey, c_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(l_orderkey@1, o_orderkey@0)], projection=[s_nationkey@0, l_extendedprice@2, l_discount@3, l_shipdate@4, o_custkey@6] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5, l_shipdate@6] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@4 >= 1995-01-01 AND l_shipdate@4 <= 1996-12-31 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-01-01 AND l_shipdate@10 <= 1996-12-31, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 <= 1996-12-31 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54530383]]}, projection=[o_orderkey, o_custkey] RaySQL Plan =========== -Query Stage #0 (1 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = IRAQ OR n_name@1 = GERMANY - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = IRAQ OR n_name@1 = GERMANY, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END, required_guarantees=[n_name in (IRAQ, GERMANY)] - -Query Stage #1 (1 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: n_name@1 = GERMANY OR n_name@1 = IRAQ - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = GERMANY OR n_name@1 = IRAQ, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] - -Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3390424], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3390424..6780848], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6780848..6782476, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3388796], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3388796..6779220]]}, projection=[c_custkey, c_nationkey] - -Query Stage #3 (2 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] - -Query Stage #4 (4 -> 4): -ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: l_shipdate@4 >= 1995-01-01 AND l_shipdate@4 <= 1996-12-31 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-01-01 AND l_shipdate@10 <= 1996-12-31, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 <= 1996-12-31 END, required_guarantees=[] - -Query Stage #5 (4 -> 4): -ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "l_orderkey", index: 1 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5, l_shipdate@6] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4)) - -Query Stage #6 (4 -> 4): -ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_custkey] - -Query Stage #7 (4 -> 4): -ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "o_custkey", index: 4 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_orderkey@1, o_orderkey@0)], projection=[s_nationkey@0, l_extendedprice@2, l_discount@3, l_shipdate@4, o_custkey@6] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "l_orderkey", index: 1 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - -Query Stage #8 (4 -> 4): -ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "s_nationkey", index: 0 }], 4)) - ProjectionExec: expr=[s_nationkey@1 as s_nationkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, l_shipdate@4 as l_shipdate, c_nationkey@0 as c_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@4)], projection=[c_nationkey@1, s_nationkey@2, l_extendedprice@3, l_discount@4, l_shipdate@5] +Query Stage #0 (1 -> 1): +SortExec: expr=[supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS LAST,l_year@2 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year, sum(shipping.volume)@3 as revenue] + AggregateExec: mode=Single, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[sum(shipping.volume)] + ProjectionExec: expr=[n_name@4 as supp_nation, n_name@0 as cust_nation, date_part(YEAR, l_shipdate@3) as l_year, l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as volume] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "o_custkey", index: 4 }], 4)) - -Query Stage #9 (4 -> 4): -ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "c_nationkey", index: 3 }], 4)) - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_shipdate@3 as l_shipdate, c_nationkey@4 as c_nationkey, n_name@0 as n_name] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@0)], projection=[n_name@1, l_extendedprice@3, l_discount@4, l_shipdate@5, c_nationkey@6] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "s_nationkey", index: 0 }], 4)) - -Query Stage #10 (4 -> 4): -ShuffleWriterExec(stage_id=10, output_partitioning=Hash([Column { name: "supp_nation", index: 0 }, Column { name: "cust_nation", index: 1 }, Column { name: "l_year", index: 2 }], 4)) - AggregateExec: mode=Partial, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[sum(shipping.volume)] - ProjectionExec: expr=[n_name@4 as supp_nation, n_name@0 as cust_nation, date_part(YEAR, l_shipdate@3) as l_year, l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as volume] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], filter=n_name@0 = GERMANY AND n_name@1 = IRAQ OR n_name@0 = IRAQ AND n_name@1 = GERMANY, projection=[n_name@1, l_extendedprice@2, l_discount@3, l_shipdate@4, n_name@6] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "c_nationkey", index: 3 }], 4)) - -Query Stage #11 (4 -> 4): -ShuffleWriterExec(stage_id=11, output_partitioning=Hash([Column { name: "supp_nation", index: 0 }, Column { name: "cust_nation", index: 1 }, Column { name: "l_year", index: 2 }], 4)) - SortExec: expr=[supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS LAST,l_year@2 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year, sum(shipping.volume)@3 as revenue] - AggregateExec: mode=FinalPartitioned, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[sum(shipping.volume)] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=10, input_partitioning=Hash([Column { name: "supp_nation", index: 0 }, Column { name: "cust_nation", index: 1 }, Column { name: "l_year", index: 2 }], 4)) - -Query Stage #12 (4 -> 1): -SortPreservingMergeExec: [supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS LAST,l_year@2 ASC NULLS LAST] - ShuffleReaderExec(stage_id=11, input_partitioning=Hash([Column { name: "supp_nation", index: 0 }, Column { name: "cust_nation", index: 1 }, Column { name: "l_year", index: 2 }], 4)) + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, c_nationkey@3)], filter=n_name@0 = GERMANY AND n_name@1 = IRAQ OR n_name@0 = IRAQ AND n_name@1 = GERMANY, projection=[n_name@1, l_extendedprice@2, l_discount@3, l_shipdate@4, n_name@6] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = IRAQ OR n_name@1 = GERMANY + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = IRAQ OR n_name@1 = GERMANY, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END, required_guarantees=[n_name in (GERMANY, IRAQ)] + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_shipdate@3 as l_shipdate, c_nationkey@4 as c_nationkey, n_name@0 as n_name] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@0)], projection=[n_name@1, l_extendedprice@3, l_discount@4, l_shipdate@5, c_nationkey@6] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: n_name@1 = GERMANY OR n_name@1 = IRAQ + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name], predicate=n_name@1 = GERMANY OR n_name@1 = IRAQ, pruning_predicate=CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= GERMANY AND GERMANY <= n_name_max@1 END OR CASE WHEN n_name_null_count@2 = n_name_row_count@3 THEN false ELSE n_name_min@0 <= IRAQ AND IRAQ <= n_name_max@1 END, required_guarantees=[n_name in (IRAQ, GERMANY)] + ProjectionExec: expr=[s_nationkey@1 as s_nationkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, l_shipdate@4 as l_shipdate, c_nationkey@0 as c_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@4)], projection=[c_nationkey@1, s_nationkey@2, l_extendedprice@3, l_discount@4, l_shipdate@5] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13211178]]}, projection=[c_custkey, c_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(l_orderkey@1, o_orderkey@0)], projection=[s_nationkey@0, l_extendedprice@2, l_discount@3, l_shipdate@4, o_custkey@6] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5, l_shipdate@6] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: l_shipdate@4 >= 1995-01-01 AND l_shipdate@4 <= 1996-12-31 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], predicate=l_shipdate@10 >= 1995-01-01 AND l_shipdate@10 <= 1996-12-31, pruning_predicate=CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_max@0 >= 1995-01-01 END AND CASE WHEN l_shipdate_null_count@1 = l_shipdate_row_count@2 THEN false ELSE l_shipdate_min@3 <= 1996-12-31 END, required_guarantees=[] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54530383]]}, projection=[o_orderkey, o_custkey] diff --git a/testdata/expected-plans/q8.txt b/testdata/expected-plans/q8.txt index c346f6c..6ab620c 100644 --- a/testdata/expected-plans/q8.txt +++ b/testdata/expected-plans/q8.txt @@ -38,203 +38,87 @@ Sort: all_nations.o_year ASC NULLS LAST DataFusion Physical Plan ======================== -SortPreservingMergeExec: [o_year@0 ASC NULLS LAST] - SortExec: expr=[o_year@0 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[o_year@0 as o_year, sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END)@1 / sum(all_nations.volume)@2 as mkt_share] - AggregateExec: mode=FinalPartitioned, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)] +SortExec: expr=[o_year@0 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[o_year@0 as o_year, sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END)@1 / sum(all_nations.volume)@2 as mkt_share] + AggregateExec: mode=Single, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)] + ProjectionExec: expr=[date_part(YEAR, o_orderdate@2) as o_year, l_extendedprice@0 * (Some(1),20,0 - l_discount@1) as volume, n_name@3 as nation] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_year@0], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)] - ProjectionExec: expr=[date_part(YEAR, o_orderdate@2) as o_year, l_extendedprice@0 * (Some(1),20,0 - l_discount@1) as volume, n_name@3 as nation] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, o_orderdate@3, n_name@5] + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, o_orderdate@3, n_name@5] + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: r_name@1 = MIDDLE EAST + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = MIDDLE EAST, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= MIDDLE EAST AND MIDDLE EAST <= r_name_max@1 END, required_guarantees=[r_name in (MIDDLE EAST)] + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, o_orderdate@3 as o_orderdate, n_regionkey@4 as n_regionkey, n_name@0 as n_name] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, l_extendedprice@2, l_discount@3, o_orderdate@5, n_regionkey@6] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, n_regionkey@0 as n_regionkey] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = MIDDLE EAST - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = MIDDLE EAST, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= MIDDLE EAST AND MIDDLE EAST <= r_name_max@1 END, required_guarantees=[r_name in (MIDDLE EAST)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_regionkey@3], 4), input_partitions=4 - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, o_orderdate@3 as o_orderdate, n_regionkey@4 as n_regionkey, n_name@0 as n_name] + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, c_nationkey@4)], projection=[n_regionkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@5] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_regionkey] + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, c_nationkey@0 as c_nationkey] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, l_extendedprice@2, l_discount@3, o_orderdate@5, n_regionkey@6] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4 - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, n_regionkey@0 as n_regionkey] + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@3)], projection=[c_nationkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@6] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13211178]]}, projection=[c_custkey, c_nationkey] + ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, o_custkey@0 as o_custkey, o_orderdate@1 as o_orderdate] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_custkey@1, o_orderdate@2, l_extendedprice@4, l_discount@5, s_nationkey@6] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, c_nationkey@4)], projection=[n_regionkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@5] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([c_nationkey@4], 4), input_partitions=4 - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, c_nationkey@0 as c_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@3)], projection=[c_nationkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@6] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3390424], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3390424..6780848], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6780848..6782476, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3388796], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3388796..6779220]]}, projection=[c_custkey, c_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_custkey@3], 4), input_partitions=4 - ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, o_custkey@0 as o_custkey, o_orderdate@1 as o_orderdate] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_custkey@1, o_orderdate@2, l_extendedprice@4, l_discount@5, s_nationkey@6] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 >= 1995-01-01 AND o_orderdate@2 <= 1996-12-31 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1995-01-01 AND o_orderdate@4 <= 1996-12-31, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 <= 1996-12-31 END, required_guarantees=[] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 - ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_suppkey@1], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_suppkey@3, l_extendedprice@4, l_discount@5] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 - ProjectionExec: expr=[p_partkey@0 as p_partkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_type@1 = LARGE PLATED STEEL - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_type], predicate=p_type@4 = LARGE PLATED STEEL, pruning_predicate=CASE WHEN p_type_null_count@2 = p_type_row_count@3 THEN false ELSE p_type_min@0 <= LARGE PLATED STEEL AND LARGE PLATED STEEL <= p_type_max@1 END, required_guarantees=[p_type in (LARGE PLATED STEEL)] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_partkey@1], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] + FilterExec: o_orderdate@2 >= 1995-01-01 AND o_orderdate@2 <= 1996-12-31 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54530383]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1995-01-01 AND o_orderdate@4 <= 1996-12-31, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 <= 1996-12-31 END, required_guarantees=[] + ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_suppkey@3, l_extendedprice@4, l_discount@5] + ProjectionExec: expr=[p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_type@1 = LARGE PLATED STEEL + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_type], predicate=p_type@4 = LARGE PLATED STEEL, pruning_predicate=CASE WHEN p_type_null_count@2 = p_type_row_count@3 THEN false ELSE p_type_min@0 <= LARGE PLATED STEEL AND LARGE PLATED STEEL <= p_type_max@1 END, required_guarantees=[p_type in (LARGE PLATED STEEL)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] RaySQL Plan =========== -Query Stage #0 (1 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) - ProjectionExec: expr=[r_regionkey@0 as r_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: r_name@1 = MIDDLE EAST - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet/part1.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = MIDDLE EAST, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= MIDDLE EAST AND MIDDLE EAST <= r_name_max@1 END, required_guarantees=[r_name in (MIDDLE EAST)] - -Query Stage #1 (1 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] - -Query Stage #2 (1 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_regionkey] - -Query Stage #3 (4 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:0..3390424], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:3390424..6780848], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part1.parquet:6780848..6782476, home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:0..3388796], [home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet/part2.parquet:3388796..6779220]]}, projection=[c_custkey, c_nationkey] - -Query Stage #4 (4 -> 4): -ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: o_orderdate@2 >= 1995-01-01 AND o_orderdate@2 <= 1996-12-31 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1995-01-01 AND o_orderdate@4 <= 1996-12-31, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 <= 1996-12-31 END, required_guarantees=[] - -Query Stage #5 (2 -> 4): -ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] - -Query Stage #6 (2 -> 4): -ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - ProjectionExec: expr=[p_partkey@0 as p_partkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_type@1 = LARGE PLATED STEEL - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_type], predicate=p_type@4 = LARGE PLATED STEEL, pruning_predicate=CASE WHEN p_type_null_count@2 = p_type_row_count@3 THEN false ELSE p_type_min@0 <= LARGE PLATED STEEL AND LARGE PLATED STEEL <= p_type_max@1 END, required_guarantees=[p_type in (LARGE PLATED STEEL)] - -Query Stage #7 (4 -> 4): -ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "l_partkey", index: 1 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] - -Query Stage #8 (4 -> 4): -ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_suppkey@3, l_extendedprice@4, l_discount@5] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "l_partkey", index: 1 }], 4)) - -Query Stage #9 (4 -> 4): -ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 4)) - -Query Stage #10 (4 -> 4): -ShuffleWriterExec(stage_id=10, output_partitioning=Hash([Column { name: "o_custkey", index: 3 }], 4)) - ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, o_custkey@0 as o_custkey, o_orderdate@1 as o_orderdate] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_custkey@1, o_orderdate@2, l_extendedprice@4, l_discount@5, s_nationkey@6] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - -Query Stage #11 (4 -> 4): -ShuffleWriterExec(stage_id=11, output_partitioning=Hash([Column { name: "c_nationkey", index: 4 }], 4)) - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, c_nationkey@0 as c_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@3)], projection=[c_nationkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@6] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=10, input_partitioning=Hash([Column { name: "o_custkey", index: 3 }], 4)) - -Query Stage #12 (4 -> 4): -ShuffleWriterExec(stage_id=12, output_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, n_regionkey@0 as n_regionkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, c_nationkey@4)], projection=[n_regionkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@5] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=11, input_partitioning=Hash([Column { name: "c_nationkey", index: 4 }], 4)) - -Query Stage #13 (4 -> 4): -ShuffleWriterExec(stage_id=13, output_partitioning=Hash([Column { name: "n_regionkey", index: 3 }], 4)) - ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, o_orderdate@3 as o_orderdate, n_regionkey@4 as n_regionkey, n_name@0 as n_name] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, l_extendedprice@2, l_discount@3, o_orderdate@5, n_regionkey@6] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) +Query Stage #0 (1 -> 1): +SortExec: expr=[o_year@0 ASC NULLS LAST], preserve_partitioning=[false] + ProjectionExec: expr=[o_year@0 as o_year, sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END)@1 / sum(all_nations.volume)@2 as mkt_share] + AggregateExec: mode=Single, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)] + ProjectionExec: expr=[date_part(YEAR, o_orderdate@2) as o_year, l_extendedprice@0 * (Some(1),20,0 - l_discount@1) as volume, n_name@3 as nation] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=12, input_partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4)) - -Query Stage #14 (4 -> 4): -ShuffleWriterExec(stage_id=14, output_partitioning=Hash([Column { name: "o_year", index: 0 }], 4)) - AggregateExec: mode=Partial, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)] - ProjectionExec: expr=[date_part(YEAR, o_orderdate@2) as o_year, l_extendedprice@0 * (Some(1),20,0 - l_discount@1) as volume, n_name@3 as nation] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, o_orderdate@3, n_name@5] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=13, input_partitioning=Hash([Column { name: "n_regionkey", index: 3 }], 4)) - -Query Stage #15 (4 -> 4): -ShuffleWriterExec(stage_id=15, output_partitioning=Hash([Column { name: "o_year", index: 0 }], 4)) - SortExec: expr=[o_year@0 ASC NULLS LAST], preserve_partitioning=[true] - ProjectionExec: expr=[o_year@0 as o_year, sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END)@1 / sum(all_nations.volume)@2 as mkt_share] - AggregateExec: mode=FinalPartitioned, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("IRAQ") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=14, input_partitioning=Hash([Column { name: "o_year", index: 0 }], 4)) - -Query Stage #16 (4 -> 1): -SortPreservingMergeExec: [o_year@0 ASC NULLS LAST] - ShuffleReaderExec(stage_id=15, input_partitioning=Hash([Column { name: "o_year", index: 0 }], 4)) + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(r_regionkey@0, n_regionkey@3)], projection=[l_extendedprice@1, l_discount@2, o_orderdate@3, n_name@5] + ProjectionExec: expr=[r_regionkey@0 as r_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: r_name@1 = MIDDLE EAST + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/region.parquet]]}, projection=[r_regionkey, r_name], predicate=r_name@1 = MIDDLE EAST, pruning_predicate=CASE WHEN r_name_null_count@2 = r_name_row_count@3 THEN false ELSE r_name_min@0 <= MIDDLE EAST AND MIDDLE EAST <= r_name_max@1 END, required_guarantees=[r_name in (MIDDLE EAST)] + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, o_orderdate@3 as o_orderdate, n_regionkey@4 as n_regionkey, n_name@0 as n_name] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@2)], projection=[n_name@1, l_extendedprice@2, l_discount@3, o_orderdate@5, n_regionkey@6] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, n_regionkey@0 as n_regionkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, c_nationkey@4)], projection=[n_regionkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@5] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_regionkey] + ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_orderdate@4 as o_orderdate, c_nationkey@0 as c_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c_custkey@0, o_custkey@3)], projection=[c_nationkey@1, l_extendedprice@2, l_discount@3, s_nationkey@4, o_orderdate@6] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/customer.parquet:0..13211178]]}, projection=[c_custkey, c_nationkey] + ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, o_custkey@0 as o_custkey, o_orderdate@1 as o_orderdate] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_custkey@1, o_orderdate@2, l_extendedprice@4, l_discount@5, s_nationkey@6] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: o_orderdate@2 >= 1995-01-01 AND o_orderdate@2 <= 1996-12-31 + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54530383]]}, projection=[o_orderkey, o_custkey, o_orderdate], predicate=o_orderdate@4 >= 1995-01-01 AND o_orderdate@4 <= 1996-12-31, pruning_predicate=CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_max@0 >= 1995-01-01 END AND CASE WHEN o_orderdate_null_count@1 = o_orderdate_row_count@2 THEN false ELSE o_orderdate_min@3 <= 1996-12-31 END, required_guarantees=[] + ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_suppkey@3, l_extendedprice@4, l_discount@5] + ProjectionExec: expr=[p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_type@1 = LARGE PLATED STEEL + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_type], predicate=p_type@4 = LARGE PLATED STEEL, pruning_predicate=CASE WHEN p_type_null_count@2 = p_type_row_count@3 THEN false ELSE p_type_min@0 <= LARGE PLATED STEEL AND LARGE PLATED STEEL <= p_type_max@1 END, required_guarantees=[p_type in (LARGE PLATED STEEL)] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] diff --git a/testdata/expected-plans/q9.txt b/testdata/expected-plans/q9.txt index 5a38167..f305294 100644 --- a/testdata/expected-plans/q9.txt +++ b/testdata/expected-plans/q9.txt @@ -27,148 +27,61 @@ Sort: profit.nation ASC NULLS LAST, profit.o_year DESC NULLS FIRST DataFusion Physical Plan ======================== -SortPreservingMergeExec: [nation@0 ASC NULLS LAST,o_year@1 DESC] - SortExec: expr=[nation@0 ASC NULLS LAST,o_year@1 DESC], preserve_partitioning=[true] - ProjectionExec: expr=[nation@0 as nation, o_year@1 as o_year, sum(profit.amount)@2 as sum_profit] - AggregateExec: mode=FinalPartitioned, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)] +SortExec: expr=[nation@0 ASC NULLS LAST,o_year@1 DESC], preserve_partitioning=[false] + ProjectionExec: expr=[nation@0 as nation, o_year@1 as o_year, sum(profit.amount)@2 as sum_profit] + AggregateExec: mode=Single, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)] + ProjectionExec: expr=[n_name@0 as nation, date_part(YEAR, o_orderdate@5) as o_year, l_extendedprice@2 * (Some(1),20,0 - l_discount@3) - ps_supplycost@4 * l_quantity@1 as amount] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([nation@0, o_year@1], 4), input_partitions=4 - AggregateExec: mode=Partial, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)] - ProjectionExec: expr=[n_name@0 as nation, date_part(YEAR, o_orderdate@5) as o_year, l_extendedprice@2 * (Some(1),20,0 - l_discount@3) - ps_supplycost@4 * l_quantity@1 as amount] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[n_name@1, l_quantity@2, l_extendedprice@3, l_discount@4, ps_supplycost@6, o_orderdate@7] + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[n_name@1, l_quantity@2, l_extendedprice@3, l_discount@4, ps_supplycost@6, o_orderdate@7] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] + ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, ps_supplycost@5 as ps_supplycost, o_orderdate@0 as o_orderdate] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, l_quantity@3, l_extendedprice@4, l_discount@5, s_nationkey@6, ps_supplycost@7] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54530383]]}, projection=[o_orderkey, o_orderdate] + ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_quantity@2 as l_quantity, l_extendedprice@3 as l_extendedprice, l_discount@4 as l_discount, s_nationkey@5 as s_nationkey, ps_supplycost@0 as ps_supplycost] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1 - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_nationkey@3], 4), input_partitions=4 - ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, ps_supplycost@5 as ps_supplycost, o_orderdate@0 as o_orderdate] + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(ps_suppkey@1, l_suppkey@2), (ps_partkey@0, l_partkey@1)], projection=[ps_supplycost@2, l_orderkey@3, l_quantity@6, l_extendedprice@7, l_discount@8, s_nationkey@9] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41091907]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_partkey@2 as l_partkey, l_suppkey@3 as l_suppkey, l_quantity@4 as l_quantity, l_extendedprice@5 as l_extendedprice, l_discount@6 as l_discount, s_nationkey@0 as s_nationkey] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, l_quantity@3, l_extendedprice@4, l_discount@5, s_nationkey@6, ps_supplycost@7] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_orderdate] + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@2)], projection=[s_nationkey@1, l_orderkey@2, l_partkey@3, l_suppkey@4, l_quantity@5, l_extendedprice@6, l_discount@7] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4 - ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_quantity@2 as l_quantity, l_extendedprice@3 as l_extendedprice, l_discount@4 as l_discount, s_nationkey@5 as s_nationkey, ps_supplycost@0 as ps_supplycost] + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_partkey@2, l_suppkey@3, l_quantity@4, l_extendedprice@5, l_discount@6] + ProjectionExec: expr=[p_partkey@0 as p_partkey] CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@1, l_suppkey@2), (ps_partkey@0, l_partkey@1)], projection=[ps_supplycost@2, l_orderkey@3, l_quantity@6, l_extendedprice@7, l_discount@8, s_nationkey@9] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([ps_suppkey@1, ps_partkey@0], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10445614], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10445614..20890506, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..722], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:722..10446336], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10446336..20891947]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_suppkey@2, l_partkey@1], 4), input_partitions=4 - ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_partkey@2 as l_partkey, l_suppkey@3 as l_suppkey, l_quantity@4 as l_quantity, l_extendedprice@5 as l_extendedprice, l_discount@6 as l_discount, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@2)], projection=[s_nationkey@1, l_orderkey@2, l_partkey@3, l_suppkey@4, l_quantity@5, l_extendedprice@6, l_discount@7] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4 - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_suppkey@2], 4), input_partitions=4 - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_partkey@2, l_suppkey@3, l_quantity@4, l_extendedprice@5, l_discount@6] - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4 - ProjectionExec: expr=[p_partkey@0 as p_partkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_name@1 LIKE %moccasin% - RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2 - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE %moccasin% - CoalesceBatchesExec: target_batch_size=8192 - RepartitionExec: partitioning=Hash([l_partkey@1], 4), input_partitions=4 - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] + FilterExec: p_name@1 LIKE %moccasin% + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE %moccasin% + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] RaySQL Plan =========== -Query Stage #0 (1 -> 4): -ShuffleWriterExec(stage_id=0, output_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet/part1.parquet]]}, projection=[n_nationkey, n_name] - -Query Stage #1 (4 -> 4): -ShuffleWriterExec(stage_id=1, output_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:0..13634779], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:13634779..27269558], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part1.parquet:27269558..27275584, home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:0..13628753], [home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet/part2.parquet:13628753..27263530]]}, projection=[o_orderkey, o_orderdate] - -Query Stage #2 (4 -> 4): -ShuffleWriterExec(stage_id=2, output_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }, Column { name: "ps_partkey", index: 0 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:0..10445614], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part1.parquet:10445614..20890506, home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:0..722], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:722..10446336], [home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet/part2.parquet:10446336..20891947]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] - -Query Stage #3 (2 -> 4): -ShuffleWriterExec(stage_id=3, output_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet/part2.parquet]]}, projection=[s_suppkey, s_nationkey] - -Query Stage #4 (2 -> 4): -ShuffleWriterExec(stage_id=4, output_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - ProjectionExec: expr=[p_partkey@0 as p_partkey] - CoalesceBatchesExec: target_batch_size=8192 - FilterExec: p_name@1 LIKE %moccasin% - ParquetExec: file_groups={2 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part1.parquet], [home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet/part2.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE %moccasin% - -Query Stage #5 (4 -> 4): -ShuffleWriterExec(stage_id=5, output_partitioning=Hash([Column { name: "l_partkey", index: 1 }], 4)) - ParquetExec: file_groups={4 groups: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:0..52081165], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part1.parquet:52081165..104133200, home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:0..29130], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:29130..52110295], [home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet/part2.parquet:52110295..104191458]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] - -Query Stage #6 (4 -> 4): -ShuffleWriterExec(stage_id=6, output_partitioning=Hash([Column { name: "l_suppkey", index: 2 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_partkey@2, l_suppkey@3, l_quantity@4, l_extendedprice@5, l_discount@6] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=4, input_partitioning=Hash([Column { name: "p_partkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=5, input_partitioning=Hash([Column { name: "l_partkey", index: 1 }], 4)) - -Query Stage #7 (4 -> 4): -ShuffleWriterExec(stage_id=7, output_partitioning=Hash([Column { name: "l_suppkey", index: 2 }, Column { name: "l_partkey", index: 1 }], 4)) - ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_partkey@2 as l_partkey, l_suppkey@3 as l_suppkey, l_quantity@4 as l_quantity, l_extendedprice@5 as l_extendedprice, l_discount@6 as l_discount, s_nationkey@0 as s_nationkey] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@2)], projection=[s_nationkey@1, l_orderkey@2, l_partkey@3, l_suppkey@4, l_quantity@5, l_extendedprice@6, l_discount@7] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=3, input_partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=6, input_partitioning=Hash([Column { name: "l_suppkey", index: 2 }], 4)) - -Query Stage #8 (4 -> 4): -ShuffleWriterExec(stage_id=8, output_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_quantity@2 as l_quantity, l_extendedprice@3 as l_extendedprice, l_discount@4 as l_discount, s_nationkey@5 as s_nationkey, ps_supplycost@0 as ps_supplycost] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@1, l_suppkey@2), (ps_partkey@0, l_partkey@1)], projection=[ps_supplycost@2, l_orderkey@3, l_quantity@6, l_extendedprice@7, l_discount@8, s_nationkey@9] +Query Stage #0 (1 -> 1): +SortExec: expr=[nation@0 ASC NULLS LAST,o_year@1 DESC], preserve_partitioning=[false] + ProjectionExec: expr=[nation@0 as nation, o_year@1 as o_year, sum(profit.amount)@2 as sum_profit] + AggregateExec: mode=Single, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)] + ProjectionExec: expr=[n_name@0 as nation, date_part(YEAR, o_orderdate@5) as o_year, l_extendedprice@2 * (Some(1),20,0 - l_discount@3) - ps_supplycost@4 * l_quantity@1 as amount] CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=2, input_partitioning=Hash([Column { name: "ps_suppkey", index: 1 }, Column { name: "ps_partkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=7, input_partitioning=Hash([Column { name: "l_suppkey", index: 2 }, Column { name: "l_partkey", index: 1 }], 4)) - -Query Stage #9 (4 -> 4): -ShuffleWriterExec(stage_id=9, output_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4)) - ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, ps_supplycost@5 as ps_supplycost, o_orderdate@0 as o_orderdate] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, l_quantity@3, l_extendedprice@4, l_discount@5, s_nationkey@6, ps_supplycost@7] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=1, input_partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=8, input_partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 4)) - -Query Stage #10 (4 -> 4): -ShuffleWriterExec(stage_id=10, output_partitioning=Hash([Column { name: "nation", index: 0 }, Column { name: "o_year", index: 1 }], 4)) - AggregateExec: mode=Partial, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)] - ProjectionExec: expr=[n_name@0 as nation, date_part(YEAR, o_orderdate@5) as o_year, l_extendedprice@2 * (Some(1),20,0 - l_discount@3) - ps_supplycost@4 * l_quantity@1 as amount] - CoalesceBatchesExec: target_batch_size=8192 - HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[n_name@1, l_quantity@2, l_extendedprice@3, l_discount@4, ps_supplycost@6, o_orderdate@7] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=0, input_partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4)) - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=9, input_partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4)) - -Query Stage #11 (4 -> 4): -ShuffleWriterExec(stage_id=11, output_partitioning=Hash([Column { name: "nation", index: 0 }, Column { name: "o_year", index: 1 }], 4)) - SortExec: expr=[nation@0 ASC NULLS LAST,o_year@1 DESC], preserve_partitioning=[true] - ProjectionExec: expr=[nation@0 as nation, o_year@1 as o_year, sum(profit.amount)@2 as sum_profit] - AggregateExec: mode=FinalPartitioned, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[sum(profit.amount)] - CoalesceBatchesExec: target_batch_size=8192 - ShuffleReaderExec(stage_id=10, input_partitioning=Hash([Column { name: "nation", index: 0 }, Column { name: "o_year", index: 1 }], 4)) - -Query Stage #12 (4 -> 1): -SortPreservingMergeExec: [nation@0 ASC NULLS LAST,o_year@1 DESC] - ShuffleReaderExec(stage_id=11, input_partitioning=Hash([Column { name: "nation", index: 0 }, Column { name: "o_year", index: 1 }], 4)) + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(n_nationkey@0, s_nationkey@3)], projection=[n_name@1, l_quantity@2, l_extendedprice@3, l_discount@4, ps_supplycost@6, o_orderdate@7] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/nation.parquet]]}, projection=[n_nationkey, n_name] + ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, ps_supplycost@5 as ps_supplycost, o_orderdate@0 as o_orderdate] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(o_orderkey@0, l_orderkey@0)], projection=[o_orderdate@1, l_quantity@3, l_extendedprice@4, l_discount@5, s_nationkey@6, ps_supplycost@7] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/orders.parquet:0..54530383]]}, projection=[o_orderkey, o_orderdate] + ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_quantity@2 as l_quantity, l_extendedprice@3 as l_extendedprice, l_discount@4 as l_discount, s_nationkey@5 as s_nationkey, ps_supplycost@0 as ps_supplycost] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(ps_suppkey@1, l_suppkey@2), (ps_partkey@0, l_partkey@1)], projection=[ps_supplycost@2, l_orderkey@3, l_quantity@6, l_extendedprice@7, l_discount@8, s_nationkey@9] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/partsupp.parquet:0..41091907]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost] + ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_partkey@2 as l_partkey, l_suppkey@3 as l_suppkey, l_quantity@4 as l_quantity, l_extendedprice@5 as l_extendedprice, l_discount@6 as l_discount, s_nationkey@0 as s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(s_suppkey@0, l_suppkey@2)], projection=[s_nationkey@1, l_orderkey@2, l_partkey@3, l_suppkey@4, l_quantity@5, l_extendedprice@6, l_discount@7] + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/supplier.parquet]]}, projection=[s_suppkey, s_nationkey] + CoalesceBatchesExec: target_batch_size=8192 + HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(p_partkey@0, l_partkey@1)], projection=[l_orderkey@1, l_partkey@2, l_suppkey@3, l_quantity@4, l_extendedprice@5, l_discount@6] + ProjectionExec: expr=[p_partkey@0 as p_partkey] + CoalesceBatchesExec: target_batch_size=8192 + FilterExec: p_name@1 LIKE %moccasin% + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/part.parquet]]}, projection=[p_partkey, p_name], predicate=p_name@1 LIKE %moccasin% + ParquetExec: file_groups={1 group: [[home/runner/work/datafusion-ray/datafusion-ray/data/lineitem.parquet:0..208317955]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] From 6412e289e5c771aa6b7d15b787a6255a2cc8e3e5 Mon Sep 17 00:00:00 2001 From: Edmondo Porcu Date: Fri, 4 Oct 2024 08:37:43 -0400 Subject: [PATCH 11/14] Introducing regexp for determinism --- Cargo.lock | 13 +++++++------ Cargo.toml | 1 + src/planner.rs | 10 +++++++++- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ae66c8d..c0bea07 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1126,6 +1126,7 @@ dependencies = [ "prost 0.12.6", "prost-types 0.12.6", "pyo3", + "regex", "rustc_version", "tokio", "tonic-build", @@ -2490,9 +2491,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.6" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619" +checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8" dependencies = [ "aho-corasick", "memchr", @@ -2502,9 +2503,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.7" +version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" +checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" dependencies = [ "aho-corasick", "memchr", @@ -2519,9 +2520,9 @@ checksum = "53a49587ad06b26609c52e423de037e7f57f20d53535d66e08c695f347df952a" [[package]] name = "regex-syntax" -version = "0.8.4" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "reqwest" diff --git a/Cargo.toml b/Cargo.toml index bf7c661..53bb6c0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,6 +48,7 @@ tonic-build = { version = "0.8", default-features = false, features = ["transpor [dev-dependencies] anyhow = "1.0.89" pretty_assertions = "1.4.0" +regex = "1.11.0" [lib] name = "datafusion_ray" diff --git a/src/planner.rs b/src/planner.rs index 40934fe..2395676 100644 --- a/src/planner.rs +++ b/src/planner.rs @@ -265,6 +265,7 @@ mod test { use datafusion::physical_plan::displayable; use datafusion::prelude::{ParquetReadOptions, SessionConfig, SessionContext}; use pretty_assertions::assert_eq; + use regex::Regex; use std::path::Path; use std::{env, fs}; type TestResult = std::result::Result; @@ -429,7 +430,14 @@ mod test { fs::write(&expected_file, &output)?; } let expected_plan = fs::read_to_string(&expected_file)?; - assert_eq!(expected_plan, output); + + let re = Regex::new(r":[^]]*]")?; + + // Remove the byte offsets from the plans, seems non repeatable + // between CI/CD and local + let cleaned_expected_plan = re.replace_all(&expected_plan, "]"); + let cleaned_output = re.replace_all(&output, "]"); + assert_eq!(cleaned_expected_plan, cleaned_output); Ok(()) } } From c7acc982c4a1baffd1847a40548d5d360c225e19 Mon Sep 17 00:00:00 2001 From: Edmondo Porcu Date: Fri, 4 Oct 2024 08:53:53 -0400 Subject: [PATCH 12/14] Ignored additional tests --- src/planner.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/planner.rs b/src/planner.rs index 2395676..78dcc5d 100644 --- a/src/planner.rs +++ b/src/planner.rs @@ -300,6 +300,7 @@ mod test { do_test(6).await } + #[ignore = "non-deterministic"] #[tokio::test] async fn test_q7() -> TestResult<()> { do_test(7).await @@ -325,6 +326,7 @@ mod test { do_test(11).await } + #[ignore = "non-deterministic"] #[tokio::test] async fn test_q12() -> TestResult<()> { do_test(12).await @@ -346,6 +348,10 @@ mod test { do_test(15).await } + // This test is ignored because there is some non-determinism + // in a part of the plan, see + // https://github.com/edmondop/datafusion-ray/actions/runs/11180062292/job/31080996808" + #[ignore = "non-deterministic"] #[tokio::test] async fn test_q16() -> TestResult<()> { do_test(16).await From 9f9c28f1f25634c6330f4a0cdd4c861a5555c993 Mon Sep 17 00:00:00 2001 From: Edmondo Porcu Date: Fri, 4 Oct 2024 09:02:18 -0400 Subject: [PATCH 13/14] Ignored additional tests --- src/planner.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/planner.rs b/src/planner.rs index 78dcc5d..0d30552 100644 --- a/src/planner.rs +++ b/src/planner.rs @@ -300,7 +300,7 @@ mod test { do_test(6).await } - #[ignore = "non-deterministic"] + #[ignore = "non-deterministic IN clause"] #[tokio::test] async fn test_q7() -> TestResult<()> { do_test(7).await @@ -326,7 +326,7 @@ mod test { do_test(11).await } - #[ignore = "non-deterministic"] + #[ignore = "non-deterministic IN clause"] #[tokio::test] async fn test_q12() -> TestResult<()> { do_test(12).await @@ -351,7 +351,7 @@ mod test { // This test is ignored because there is some non-determinism // in a part of the plan, see // https://github.com/edmondop/datafusion-ray/actions/runs/11180062292/job/31080996808" - #[ignore = "non-deterministic"] + #[ignore = "non-deterministic IN clause"] #[tokio::test] async fn test_q16() -> TestResult<()> { do_test(16).await @@ -367,6 +367,7 @@ mod test { do_test(18).await } + #[ignore = "non-deterministic IN clause"] #[tokio::test] async fn test_q19() -> TestResult<()> { do_test(19).await From 6fa7c65827e98fc28c02489d6694c4ec47c3a1bc Mon Sep 17 00:00:00 2001 From: Edmondo Porcu Date: Fri, 4 Oct 2024 09:20:52 -0400 Subject: [PATCH 14/14] Update README.md --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index dc3ba9e..b3576fc 100644 --- a/README.md +++ b/README.md @@ -130,7 +130,9 @@ maturin develop python -m pytest ``` Running local Rust tests require generating the tpch-data. This can be done by running the following command: -```bash ./scripts/generate_tpch_data.sh ``` +```bash +./scripts/generate_tpch_data.sh +``` Tests compare plans with expected plans, which unfortunately contain the path to the parquet tables. The path committed under version control is @@ -138,9 +140,7 @@ the one of a Github Runner, and won't work locally. You can fix it by running the following command: ```bash - ./scripts/replace-expected-plan-paths.sh local-dev - ```` When instead you need to regenerate the plans, which you can do by